diff --git "a/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.20.log" "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.20.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.20.log" @@ -0,0 +1,3265 @@ +# Running on gpub006.delta.ncsa.illinois.edu +# Started at Wed Jan 24 22:22:13 CST 2024 +# SLURMD_NODENAME=gpub006 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2892722 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_END_TIME=1706329307 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2892722 +# SLURM_JOB_NAME=exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[006-009,023,030-031,033,047,054,058,070,089-090,093-094]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706156507 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[006-009,023,030-031,033,047,054,058,070,089-090,093-094]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=60478 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub006 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +[gpub006:0/64] 2024-01-24 22:25:24,036 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub006:0/64] 2024-01-24 22:25:34,586 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:25:44,596 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:25:54,607 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:26:04,610 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:26:14,629 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:26:24,655 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:26:34,663 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:26:44,665 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:26:54,671 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:27:04,681 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:27:14,699 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:27:24,723 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:27:34,730 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:27:44,758 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:27:54,788 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:28:04,810 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:28:14,845 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:28:24,855 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:28:34,897 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:28:44,929 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:28:54,968 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:29:04,974 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:29:14,994 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:29:25,041 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +[gpub006:0/64] 2024-01-24 22:29:35,054 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=24, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:29:48,529 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=32, timeout=0:30:00) +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_5b42678c-f8a7-4a52-a25b-4f9fc34988b9 +[gpub006:0/64] 2024-01-24 22:30:05,513 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=40, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:30:36,977 (distributed_c10d:337) INFO: Waiting in store based barrier to initialize process group for rank: 0, key: store_based_barrier_key:1 (world_size=64, worker_count=64, timeout=0:30:00) +[gpub006:0/64] 2024-01-24 22:30:36,977 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub006:0/64] 2024-01-24 22:30:37,018 (s2t:420) INFO: Vocabulary size: 50002 +[gpub006:0/64] 2024-01-24 22:30:49,007 (abs_task:1270) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub006:0/64] 2024-01-24 22:30:49,066 (abs_task:1271) INFO: Model structure: +ESPnetS2TCTCModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerCTCEncoder( + (embed): Conv2dSubsampling8( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (5): ReLU() + ) + (out): Linear(in_features=9216, out_features=1024, bias=True) + (pos_enc): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (9): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (10): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (11): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (12): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (13): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (14): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (15): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (16): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (17): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (18): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (19): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (20): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (21): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (22): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (23): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (24): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (25): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (26): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (conditioning_layer): Linear(in_features=50002, out_features=1024, bias=True) + ) + (prompt_encoder): TransformerEncoder( + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + ) + (embed): Embedding(50002, 512) + (pos_enc): PositionalEncoding( + (dropout): Dropout(p=0.0, inplace=False) + ) + (embed_proj): Linear(in_features=512, out_features=1024, bias=True) + (prompt_proj): Linear(in_features=512, out_features=1024, bias=True) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TCTCModel + Total Number of model parameters: 1.01 B + Number of trainable parameters: 1.01 B (100.0%) + Size: 4.02 GB + Type: torch.float32 +[gpub006:0/64] 2024-01-24 22:30:49,066 (abs_task:1274) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0002 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub006:0/64] 2024-01-24 22:30:49,066 (abs_task:1275) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002]) +[gpub006:0/64] 2024-01-24 22:30:49,067 (abs_task:1284) INFO: Saving the configuration in exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml +[gpub006:0/64] 2024-01-24 22:30:54,440 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-24 22:30:55,335 (abs_task:1660) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-24 22:30:55,335 (abs_task:1661) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub006:0/64] 2024-01-24 22:30:55,336 (abs_task:1662) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-24 22:31:31,409 (trainer:167) INFO: The training was resumed using exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/checkpoint.pth +gpub006:60543:60543 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:60543:60543 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:60543:60543 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub006:0/64] 2024-01-24 22:32:15,242 (trainer:298) INFO: 13/45epoch started +[gpub006:0/64] 2024-01-24 22:32:15,281 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub006:0/64] 2024-01-24 22:32:33,304 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-24 22:32:36,661 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-24 22:32:36,661 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub006:0/64] 2024-01-24 22:32:36,664 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub006:60543:60612 [0] NCCL INFO NET/IB : No device found. +gpub006:60543:60612 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:60543:60612 [0] NCCL INFO Using network Socket +gpub006:60543:60612 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub006:60543:60612 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub006:60543:60612 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub006:60543:60612 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub006:60543:60612 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub006:60543:60612 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub006:60543:60612 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub006:60543:60612 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub006:60543:60612 [0] NCCL INFO Connected all rings +gpub006:60543:60612 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub006:60543:60612 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub006:60543:60612 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/Socket/1 +gpub006:60543:60612 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub006:60543:60612 [0] NCCL INFO Connected all trees +gpub006:60543:60612 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:60543:60612 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:60543:60612 [0] NCCL INFO comm 0x557e14e02c10 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub006:60546:60546 [3] NCCL INFO cudaDriverVersion 12020 +gpub006:60546:60546 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:60546:60546 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:60546:60614 [3] NCCL INFO NET/IB : No device found. +gpub006:60546:60614 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:60546:60614 [3] NCCL INFO Using network Socket +gpub006:60546:60614 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub006:60546:60614 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub006:60546:60614 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub006:60546:60614 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub006:60546:60614 [3] NCCL INFO Connected all rings +gpub006:60546:60614 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub006:60546:60614 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub006:60546:60614 [3] NCCL INFO Connected all trees +gpub006:60546:60614 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:60546:60614 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:60546:60614 [3] NCCL INFO comm 0x55e59122c6e0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub006:60545:60545 [2] NCCL INFO cudaDriverVersion 12020 +gpub006:60545:60545 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:60545:60545 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:60545:60615 [2] NCCL INFO NET/IB : No device found. +gpub006:60545:60615 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:60545:60615 [2] NCCL INFO Using network Socket +gpub006:60545:60615 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub006:60545:60615 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub006:60545:60615 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub006:60545:60615 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub006:60545:60615 [2] NCCL INFO Connected all rings +gpub006:60545:60615 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub006:60545:60615 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub006:60545:60615 [2] NCCL INFO Connected all trees +gpub006:60545:60615 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:60545:60615 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:60545:60615 [2] NCCL INFO comm 0x5624b483a990 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub006:60544:60544 [1] NCCL INFO cudaDriverVersion 12020 +gpub006:60544:60544 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:60544:60544 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:60544:60613 [1] NCCL INFO NET/IB : No device found. +gpub006:60544:60613 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:60544:60613 [1] NCCL INFO Using network Socket +gpub006:60544:60613 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub006:60544:60613 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub006:60544:60613 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub006:60544:60613 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub006:60544:60613 [1] NCCL INFO Connected all rings +gpub006:60544:60613 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub006:60544:60613 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub006:60544:60613 [1] NCCL INFO Connected all trees +gpub006:60544:60613 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:60544:60613 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:60544:60613 [1] NCCL INFO comm 0x558d5c02b3d0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub009:694343:694343 [2] NCCL INFO cudaDriverVersion 12020 +gpub009:694343:694343 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:694343:694343 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:694343:694418 [2] NCCL INFO NET/IB : No device found. +gpub009:694343:694418 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.109<0> [1]hsn0:141.142.145.109<0> +gpub009:694343:694418 [2] NCCL INFO Using network Socket +gpub009:694343:694418 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub009:694343:694418 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub009:694343:694418 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub009:694343:694418 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub009:694343:694418 [2] NCCL INFO Connected all rings +gpub009:694343:694418 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub009:694343:694418 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub009:694343:694418 [2] NCCL INFO Connected all trees +gpub009:694343:694418 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub009:694343:694418 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:694343:694418 [2] NCCL INFO comm 0x55ef57312670 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub009:694344:694344 [3] NCCL INFO cudaDriverVersion 12020 +gpub009:694344:694344 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:694344:694344 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:694344:694420 [3] NCCL INFO NET/IB : No device found. +gpub009:694344:694420 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.109<0> [1]hsn0:141.142.145.109<0> +gpub009:694344:694420 [3] NCCL INFO Using network Socket +gpub009:694344:694420 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub009:694344:694420 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub009:694344:694420 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/Socket/1 +gpub009:694344:694420 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/Socket/1 +gpub009:694344:694420 [3] NCCL INFO Connected all rings +gpub009:694344:694420 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub009:694344:694420 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub009:694344:694420 [3] NCCL INFO Connected all trees +gpub009:694344:694420 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub009:694344:694420 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:694344:694420 [3] NCCL INFO comm 0x56182a81abf0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub009:694341:694341 [0] NCCL INFO cudaDriverVersion 12020 +gpub009:694341:694341 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:694341:694341 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:694341:694419 [0] NCCL INFO NET/IB : No device found. +gpub009:694341:694419 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.109<0> [1]hsn0:141.142.145.109<0> +gpub009:694341:694419 [0] NCCL INFO Using network Socket +gpub009:694341:694419 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub009:694341:694419 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub009:694341:694419 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub009:694341:694419 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub009:694341:694419 [0] NCCL INFO Connected all rings +gpub009:694341:694419 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub009:694341:694419 [0] NCCL INFO Connected all trees +gpub009:694341:694419 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub009:694341:694419 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:694341:694419 [0] NCCL INFO comm 0x55d3558a94f0 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub009:694342:694342 [1] NCCL INFO cudaDriverVersion 12020 +gpub009:694342:694342 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0> +gpub009:694342:694342 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub009:694342:694421 [1] NCCL INFO NET/IB : No device found. +gpub009:694342:694421 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.109<0> [1]hsn0:141.142.145.109<0> +gpub009:694342:694421 [1] NCCL INFO Using network Socket +gpub009:694342:694421 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub009:694342:694421 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub009:694342:694421 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub009:694342:694421 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub009:694342:694421 [1] NCCL INFO Connected all rings +gpub009:694342:694421 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/Socket/1 +gpub009:694342:694421 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/Socket/1 +gpub009:694342:694421 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub009:694342:694421 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub009:694342:694421 [1] NCCL INFO Connected all trees +gpub009:694342:694421 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub009:694342:694421 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub009:694342:694421 [1] NCCL INFO comm 0x562649169fc0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub054:1587786:1587786 [2] NCCL INFO cudaDriverVersion 12020 +gpub054:1587786:1587786 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1587786:1587786 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1587786:1587862 [2] NCCL INFO NET/IB : No device found. +gpub054:1587786:1587862 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1587786:1587862 [2] NCCL INFO Using network Socket +gpub054:1587786:1587862 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub054:1587786:1587862 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub054:1587786:1587862 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub054:1587786:1587862 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub054:1587786:1587862 [2] NCCL INFO Connected all rings +gpub054:1587786:1587862 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub054:1587786:1587862 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub054:1587786:1587862 [2] NCCL INFO Connected all trees +gpub054:1587786:1587862 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1587786:1587862 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1587786:1587862 [2] NCCL INFO comm 0x559490e7a560 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub054:1587784:1587784 [0] NCCL INFO cudaDriverVersion 12020 +gpub054:1587784:1587784 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1587784:1587784 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1587784:1587863 [0] NCCL INFO NET/IB : No device found. +gpub054:1587784:1587863 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1587784:1587863 [0] NCCL INFO Using network Socket +gpub054:1587784:1587863 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub054:1587784:1587863 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub054:1587784:1587863 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub054:1587784:1587863 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub054:1587784:1587863 [0] NCCL INFO Connected all rings +gpub054:1587784:1587863 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/Socket/1 +gpub054:1587784:1587863 [0] NCCL INFO Connected all trees +gpub054:1587784:1587863 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1587784:1587863 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1587784:1587863 [0] NCCL INFO comm 0x5579d146f200 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub054:1587785:1587785 [1] NCCL INFO cudaDriverVersion 12020 +gpub054:1587785:1587785 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1587785:1587785 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1587785:1587864 [1] NCCL INFO NET/IB : No device found. +gpub054:1587785:1587864 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1587785:1587864 [1] NCCL INFO Using network Socket +gpub054:1587785:1587864 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub054:1587785:1587864 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub054:1587785:1587864 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub054:1587785:1587864 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub054:1587785:1587864 [1] NCCL INFO Connected all rings +gpub054:1587785:1587864 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/Socket/1 +gpub054:1587785:1587864 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/Socket/1 +gpub054:1587785:1587864 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub054:1587785:1587864 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub054:1587785:1587864 [1] NCCL INFO Connected all trees +gpub054:1587785:1587864 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1587785:1587864 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1587785:1587864 [1] NCCL INFO comm 0x556032d22140 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub054:1587787:1587787 [3] NCCL INFO cudaDriverVersion 12020 +gpub054:1587787:1587787 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1587787:1587787 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1587787:1587861 [3] NCCL INFO NET/IB : No device found. +gpub054:1587787:1587861 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1587787:1587861 [3] NCCL INFO Using network Socket +gpub054:1587787:1587861 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub054:1587787:1587861 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub054:1587787:1587861 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/Socket/1 +gpub054:1587787:1587861 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/Socket/1 +gpub054:1587787:1587861 [3] NCCL INFO Connected all rings +gpub054:1587787:1587861 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub054:1587787:1587861 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub054:1587787:1587861 [3] NCCL INFO Connected all trees +gpub054:1587787:1587861 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1587787:1587861 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1587787:1587861 [3] NCCL INFO comm 0x5583401b7620 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:701975:701975 [3] NCCL INFO cudaDriverVersion 12020 +gpub058:701975:701975 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:701975:701975 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:701975:702054 [3] NCCL INFO NET/IB : No device found. +gpub058:701975:702054 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:701975:702054 [3] NCCL INFO Using network Socket +gpub058:701975:702054 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub058:701975:702054 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub058:701975:702054 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/Socket/1 +gpub058:701975:702054 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/Socket/1 +gpub058:701975:702054 [3] NCCL INFO Connected all rings +gpub058:701975:702054 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub023:1117850:1117850 [2] NCCL INFO cudaDriverVersion 12020 +gpub023:1117850:1117850 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:1117850:1117850 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:1117850:1117924 [2] NCCL INFO NET/IB : No device found. +gpub023:1117850:1117924 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.123<0> [1]hsn0:141.142.145.123<0> +gpub023:1117850:1117924 [2] NCCL INFO Using network Socket +gpub023:1117850:1117924 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub023:1117850:1117924 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub023:1117850:1117924 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub023:1117850:1117924 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub023:1117850:1117924 [2] NCCL INFO Connected all rings +gpub023:1117850:1117924 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub058:701975:702054 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub058:701975:702054 [3] NCCL INFO Connected all trees +gpub058:701975:702054 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub058:701975:702054 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:701975:702054 [3] NCCL INFO comm 0x55a7621bbff0 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub023:1117850:1117924 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub023:1117850:1117924 [2] NCCL INFO Connected all trees +gpub023:1117850:1117924 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:1117850:1117924 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:1117850:1117924 [2] NCCL INFO comm 0x55f96393eca0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub058:701972:701972 [0] NCCL INFO cudaDriverVersion 12020 +gpub058:701972:701972 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:701972:701972 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:701972:702052 [0] NCCL INFO NET/IB : No device found. +gpub058:701972:702052 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:701972:702052 [0] NCCL INFO Using network Socket +gpub058:701972:702052 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub058:701972:702052 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub058:701972:702052 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub023:1117849:1117849 [1] NCCL INFO cudaDriverVersion 12020 +gpub023:1117849:1117849 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:1117849:1117849 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:1117849:1117925 [1] NCCL INFO NET/IB : No device found. +gpub023:1117849:1117925 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.123<0> [1]hsn0:141.142.145.123<0> +gpub023:1117849:1117925 [1] NCCL INFO Using network Socket +gpub023:1117849:1117925 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub023:1117849:1117925 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub023:1117849:1117925 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub023:1117849:1117925 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub023:1117849:1117925 [1] NCCL INFO Connected all rings +gpub023:1117849:1117925 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub058:701972:702052 [0] NCCL INFO Connected all rings +gpub058:701972:702052 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/Socket/1 +gpub058:701972:702052 [0] NCCL INFO Connected all trees +gpub058:701972:702052 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub058:701972:702052 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:1117849:1117925 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/Socket/1 +gpub023:1117849:1117925 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub023:1117849:1117925 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub023:1117849:1117925 [1] NCCL INFO Connected all trees +gpub023:1117849:1117925 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:1117849:1117925 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:1117849:1117925 [1] NCCL INFO comm 0x56180dbdd0b0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub058:701972:702052 [0] NCCL INFO comm 0x56319f5cac20 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub023:1117848:1117848 [0] NCCL INFO cudaDriverVersion 12020 +gpub023:1117848:1117848 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:1117848:1117848 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:1117848:1117923 [0] NCCL INFO NET/IB : No device found. +gpub023:1117848:1117923 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.123<0> [1]hsn0:141.142.145.123<0> +gpub023:1117848:1117923 [0] NCCL INFO Using network Socket +gpub023:1117848:1117923 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub023:1117848:1117923 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub023:1117848:1117923 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub058:701973:701973 [1] NCCL INFO cudaDriverVersion 12020 +gpub058:701973:701973 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:701973:701973 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:701973:702055 [1] NCCL INFO NET/IB : No device found. +gpub058:701973:702055 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:701973:702055 [1] NCCL INFO Using network Socket +gpub058:701973:702055 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub058:701973:702055 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub058:701973:702055 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub058:701973:702055 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub058:701973:702055 [1] NCCL INFO Connected all rings +gpub058:701973:702055 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub023:1117848:1117923 [0] NCCL INFO Connected all rings +gpub023:1117848:1117923 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/Socket/1 +gpub023:1117848:1117923 [0] NCCL INFO Connected all trees +gpub023:1117848:1117923 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:1117848:1117923 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:701973:702055 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/Socket/1 +gpub058:701973:702055 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub058:701973:702055 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub058:701973:702055 [1] NCCL INFO Connected all trees +gpub058:701973:702055 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub058:701973:702055 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:701973:702055 [1] NCCL INFO comm 0x55a9a4468b60 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub023:1117848:1117923 [0] NCCL INFO comm 0x561218355ae0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub023:1117851:1117851 [3] NCCL INFO cudaDriverVersion 12020 +gpub023:1117851:1117851 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0> +gpub023:1117851:1117851 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub023:1117851:1117922 [3] NCCL INFO NET/IB : No device found. +gpub023:1117851:1117922 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.123<0> [1]hsn0:141.142.145.123<0> +gpub023:1117851:1117922 [3] NCCL INFO Using network Socket +gpub023:1117851:1117922 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub023:1117851:1117922 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub023:1117851:1117922 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/Socket/1 +gpub023:1117851:1117922 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/Socket/1 +gpub023:1117851:1117922 [3] NCCL INFO Connected all rings +gpub023:1117851:1117922 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub023:1117851:1117922 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub023:1117851:1117922 [3] NCCL INFO Connected all trees +gpub023:1117851:1117922 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub023:1117851:1117922 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub023:1117851:1117922 [3] NCCL INFO comm 0x563bda588370 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub058:701974:701974 [2] NCCL INFO cudaDriverVersion 12020 +gpub058:701974:701974 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0> +gpub058:701974:701974 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub058:701974:702053 [2] NCCL INFO NET/IB : No device found. +gpub058:701974:702053 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.158<0> [1]hsn0:141.142.145.158<0> [2]eth0:fe80::5e76:5fb4:a207:b9dd%eth0<0> +gpub058:701974:702053 [2] NCCL INFO Using network Socket +gpub058:701974:702053 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub058:701974:702053 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub058:701974:702053 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub058:701974:702053 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub058:701974:702053 [2] NCCL INFO Connected all rings +gpub058:701974:702053 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub058:701974:702053 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub058:701974:702053 [2] NCCL INFO Connected all trees +gpub058:701974:702053 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub058:701974:702053 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub058:701974:702053 [2] NCCL INFO comm 0x562c5ca2f290 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub047:1542472:1542472 [0] NCCL INFO cudaDriverVersion 12020 +gpub047:1542472:1542472 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.147<0> +gpub047:1542472:1542472 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub047:1542472:1542546 [0] NCCL INFO NET/IB : No device found. +gpub047:1542472:1542546 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.147<0> [1]hsn0:141.142.145.147<0> +gpub047:1542472:1542546 [0] NCCL INFO Using network Socket +gpub047:1542472:1542546 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub047:1542472:1542546 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub047:1542472:1542546 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub047:1542472:1542546 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub047:1542472:1542546 [0] NCCL INFO Connected all rings +gpub047:1542472:1542546 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/Socket/1 +gpub047:1542472:1542546 [0] NCCL INFO Connected all trees +gpub047:1542472:1542546 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub047:1542472:1542546 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub047:1542472:1542546 [0] NCCL INFO comm 0x5589de5eb2a0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub047:1542473:1542473 [1] NCCL INFO cudaDriverVersion 12020 +gpub047:1542473:1542473 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.147<0> +gpub047:1542473:1542473 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub047:1542473:1542547 [1] NCCL INFO NET/IB : No device found. +gpub047:1542473:1542547 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.147<0> [1]hsn0:141.142.145.147<0> +gpub047:1542473:1542547 [1] NCCL INFO Using network Socket +gpub047:1542473:1542547 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub047:1542473:1542547 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub047:1542473:1542547 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub047:1542473:1542547 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub047:1542473:1542547 [1] NCCL INFO Connected all rings +gpub047:1542473:1542547 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/Socket/1 +gpub047:1542473:1542547 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/Socket/1 +gpub047:1542473:1542547 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub047:1542473:1542547 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub047:1542473:1542547 [1] NCCL INFO Connected all trees +gpub047:1542473:1542547 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub047:1542473:1542547 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub047:1542473:1542547 [1] NCCL INFO comm 0x5588ab5f5b60 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub047:1542474:1542474 [2] NCCL INFO cudaDriverVersion 12020 +gpub047:1542474:1542474 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.147<0> +gpub047:1542474:1542474 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub047:1542474:1542549 [2] NCCL INFO NET/IB : No device found. +gpub047:1542474:1542549 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.147<0> [1]hsn0:141.142.145.147<0> +gpub047:1542474:1542549 [2] NCCL INFO Using network Socket +gpub047:1542474:1542549 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub047:1542474:1542549 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub047:1542474:1542549 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub047:1542474:1542549 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub047:1542474:1542549 [2] NCCL INFO Connected all rings +gpub047:1542474:1542549 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub047:1542474:1542549 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub047:1542474:1542549 [2] NCCL INFO Connected all trees +gpub047:1542474:1542549 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub047:1542474:1542549 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub047:1542474:1542549 [2] NCCL INFO comm 0x5623d6c3f840 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub047:1542475:1542475 [3] NCCL INFO cudaDriverVersion 12020 +gpub047:1542475:1542475 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.147<0> +gpub047:1542475:1542475 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub047:1542475:1542548 [3] NCCL INFO NET/IB : No device found. +gpub047:1542475:1542548 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.147<0> [1]hsn0:141.142.145.147<0> +gpub047:1542475:1542548 [3] NCCL INFO Using network Socket +gpub047:1542475:1542548 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub047:1542475:1542548 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub047:1542475:1542548 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/Socket/1 +gpub047:1542475:1542548 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/Socket/1 +gpub047:1542475:1542548 [3] NCCL INFO Connected all rings +gpub047:1542475:1542548 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub047:1542475:1542548 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub047:1542475:1542548 [3] NCCL INFO Connected all trees +gpub047:1542475:1542548 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub047:1542475:1542548 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub047:1542475:1542548 [3] NCCL INFO comm 0x560d037255a0 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub030:1343661:1343661 [1] NCCL INFO cudaDriverVersion 12020 +gpub030:1343661:1343661 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:1343661:1343661 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:1343661:1343771 [1] NCCL INFO NET/IB : No device found. +gpub030:1343661:1343771 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.130<0> [1]hsn0:141.142.145.130<0> +gpub030:1343661:1343771 [1] NCCL INFO Using network Socket +gpub030:1343661:1343771 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub030:1343661:1343771 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub030:1343661:1343771 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:1343661:1343771 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub030:1343661:1343771 [1] NCCL INFO Connected all rings +gpub030:1343661:1343771 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/Socket/1 +gpub030:1343661:1343771 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/Socket/1 +gpub030:1343661:1343771 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:1343661:1343771 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub030:1343661:1343771 [1] NCCL INFO Connected all trees +gpub030:1343661:1343771 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:1343661:1343771 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:1343661:1343771 [1] NCCL INFO comm 0x55a00a47c260 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub030:1343662:1343662 [2] NCCL INFO cudaDriverVersion 12020 +gpub030:1343662:1343662 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:1343662:1343662 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:1343662:1343768 [2] NCCL INFO NET/IB : No device found. +gpub030:1343662:1343768 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.130<0> [1]hsn0:141.142.145.130<0> +gpub030:1343662:1343768 [2] NCCL INFO Using network Socket +gpub030:1343662:1343768 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub030:1343662:1343768 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub030:1343662:1343768 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:1343662:1343768 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub030:1343662:1343768 [2] NCCL INFO Connected all rings +gpub030:1343662:1343768 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:1343662:1343768 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub030:1343662:1343768 [2] NCCL INFO Connected all trees +gpub030:1343662:1343768 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:1343662:1343768 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:1343662:1343768 [2] NCCL INFO comm 0x55bd31f360f0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub030:1343663:1343663 [3] NCCL INFO cudaDriverVersion 12020 +gpub030:1343663:1343663 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:1343663:1343663 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:1343663:1343769 [3] NCCL INFO NET/IB : No device found. +gpub030:1343663:1343769 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.130<0> [1]hsn0:141.142.145.130<0> +gpub030:1343663:1343769 [3] NCCL INFO Using network Socket +gpub030:1343663:1343769 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub030:1343663:1343769 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub030:1343663:1343769 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/Socket/1 +gpub030:1343663:1343769 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/Socket/1 +gpub030:1343663:1343769 [3] NCCL INFO Connected all rings +gpub030:1343663:1343769 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:1343663:1343769 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub030:1343663:1343769 [3] NCCL INFO Connected all trees +gpub030:1343663:1343769 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:1343663:1343769 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:1343663:1343769 [3] NCCL INFO comm 0x55e0ff9d02c0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub030:1343660:1343660 [0] NCCL INFO cudaDriverVersion 12020 +gpub030:1343660:1343660 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0> +gpub030:1343660:1343660 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub030:1343660:1343770 [0] NCCL INFO NET/IB : No device found. +gpub030:1343660:1343770 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.130<0> [1]hsn0:141.142.145.130<0> +gpub030:1343660:1343770 [0] NCCL INFO Using network Socket +gpub030:1343660:1343770 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub030:1343660:1343770 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub030:1343660:1343770 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:1343660:1343770 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub030:1343660:1343770 [0] NCCL INFO Connected all rings +gpub030:1343660:1343770 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/Socket/1 +gpub030:1343660:1343770 [0] NCCL INFO Connected all trees +gpub030:1343660:1343770 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub030:1343660:1343770 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub030:1343660:1343770 [0] NCCL INFO comm 0x5587e3102700 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub033:637234:637234 [0] NCCL INFO cudaDriverVersion 12020 +gpub033:637234:637234 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.133<0> +gpub033:637234:637234 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub033:637234:637343 [0] NCCL INFO NET/IB : No device found. +gpub033:637234:637343 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.133<0> [1]hsn0:141.142.145.133<0> +gpub033:637234:637343 [0] NCCL INFO Using network Socket +gpub033:637234:637343 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub033:637234:637343 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub033:637234:637343 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub033:637234:637343 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub033:637234:637343 [0] NCCL INFO Connected all rings +gpub033:637234:637343 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/Socket/1 +gpub033:637234:637343 [0] NCCL INFO Connected all trees +gpub033:637234:637343 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub033:637234:637343 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub033:637234:637343 [0] NCCL INFO comm 0x55adb72bcfe0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub031:2691935:2691935 [3] NCCL INFO cudaDriverVersion 12020 +gpub031:2691935:2691935 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:2691935:2691935 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:2691935:2692009 [3] NCCL INFO NET/IB : No device found. +gpub031:2691935:2692009 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.131<0> [1]hsn0:141.142.145.131<0> +gpub031:2691935:2692009 [3] NCCL INFO Using network Socket +gpub031:2691935:2692009 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub031:2691935:2692009 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub031:2691935:2692009 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/Socket/1 +gpub031:2691935:2692009 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/Socket/1 +gpub031:2691935:2692009 [3] NCCL INFO Connected all rings +gpub031:2691935:2692009 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub031:2691935:2692009 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub031:2691935:2692009 [3] NCCL INFO Connected all trees +gpub031:2691935:2692009 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:2691935:2692009 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:2691935:2692009 [3] NCCL INFO comm 0x55d515fe80b0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub070:139012:139012 [3] NCCL INFO cudaDriverVersion 12020 +gpub070:139012:139012 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.170<0> +gpub070:139012:139012 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub070:139012:139069 [3] NCCL INFO NET/IB : No device found. +gpub070:139012:139069 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.170<0> [1]hsn0:141.142.145.170<0> [2]eth0:fe80::d7b5:1538:35b8:9e72%eth0<0> +gpub070:139012:139069 [3] NCCL INFO Using network Socket +gpub070:139012:139069 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub070:139012:139069 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub070:139012:139069 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/Socket/1 +gpub070:139012:139069 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/Socket/1 +gpub070:139012:139069 [3] NCCL INFO Connected all rings +gpub070:139012:139069 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub070:139012:139069 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub070:139012:139069 [3] NCCL INFO Connected all trees +gpub070:139012:139069 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub070:139012:139069 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub070:139012:139069 [3] NCCL INFO comm 0x5644c9f7ba00 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub007:700944:700944 [3] NCCL INFO cudaDriverVersion 12020 +gpub007:700944:700944 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:700944:700944 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:700944:701009 [3] NCCL INFO NET/IB : No device found. +gpub007:700944:701009 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:700944:701009 [3] NCCL INFO Using network Socket +gpub007:700944:701009 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub007:700944:701009 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub007:700944:701009 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub007:700944:701009 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub007:700944:701009 [3] NCCL INFO Connected all rings +gpub007:700944:701009 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub007:700944:701009 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub007:700944:701009 [3] NCCL INFO Connected all trees +gpub007:700944:701009 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:700944:701009 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:700944:701009 [3] NCCL INFO comm 0x55bbe748b190 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub007:700943:700943 [2] NCCL INFO cudaDriverVersion 12020 +gpub007:700943:700943 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:700943:700943 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:700943:701011 [2] NCCL INFO NET/IB : No device found. +gpub007:700943:701011 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:700943:701011 [2] NCCL INFO Using network Socket +gpub007:700943:701011 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub007:700943:701011 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub007:700943:701011 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub007:700943:701011 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub007:700943:701011 [2] NCCL INFO Connected all rings +gpub007:700943:701011 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub007:700943:701011 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub007:700943:701011 [2] NCCL INFO Connected all trees +gpub007:700943:701011 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:700943:701011 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:700943:701011 [2] NCCL INFO comm 0x56433fa4bb70 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub007:700942:700942 [1] NCCL INFO cudaDriverVersion 12020 +gpub007:700942:700942 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:700942:700942 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:700942:701010 [1] NCCL INFO NET/IB : No device found. +gpub007:700942:701010 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:700942:701010 [1] NCCL INFO Using network Socket +gpub007:700942:701010 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub007:700942:701010 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub007:700942:701010 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub007:700942:701010 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub007:700942:701010 [1] NCCL INFO Connected all rings +gpub007:700942:701010 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub007:700942:701010 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub007:700942:701010 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub007:700942:701010 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub007:700942:701010 [1] NCCL INFO Connected all trees +gpub007:700942:701010 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:700942:701010 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:700942:701010 [1] NCCL INFO comm 0x561d936d4270 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub031:2691933:2691933 [1] NCCL INFO cudaDriverVersion 12020 +gpub031:2691933:2691933 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:2691933:2691933 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:2691933:2692008 [1] NCCL INFO NET/IB : No device found. +gpub031:2691933:2692008 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.131<0> [1]hsn0:141.142.145.131<0> +gpub031:2691933:2692008 [1] NCCL INFO Using network Socket +gpub031:2691933:2692008 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub031:2691933:2692008 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub031:2691933:2692008 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub031:2691933:2692008 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub031:2691933:2692008 [1] NCCL INFO Connected all rings +gpub031:2691933:2692008 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/Socket/1 +gpub031:2691933:2692008 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/Socket/1 +gpub031:2691933:2692008 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub031:2691933:2692008 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub031:2691933:2692008 [1] NCCL INFO Connected all trees +gpub031:2691933:2692008 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:2691933:2692008 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:2691933:2692008 [1] NCCL INFO comm 0x5565b5ba2490 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub033:637237:637237 [3] NCCL INFO cudaDriverVersion 12020 +gpub033:637237:637237 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.133<0> +gpub033:637237:637237 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub033:637237:637340 [3] NCCL INFO NET/IB : No device found. +gpub033:637237:637340 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.133<0> [1]hsn0:141.142.145.133<0> +gpub033:637237:637340 [3] NCCL INFO Using network Socket +gpub033:637237:637340 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub033:637237:637340 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub033:637237:637340 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/Socket/1 +gpub033:637237:637340 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/Socket/1 +gpub033:637237:637340 [3] NCCL INFO Connected all rings +gpub033:637237:637340 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub031:2691932:2691932 [0] NCCL INFO cudaDriverVersion 12020 +gpub031:2691932:2691932 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:2691932:2691932 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:2691932:2692007 [0] NCCL INFO NET/IB : No device found. +gpub031:2691932:2692007 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.131<0> [1]hsn0:141.142.145.131<0> +gpub031:2691932:2692007 [0] NCCL INFO Using network Socket +gpub031:2691932:2692007 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub031:2691932:2692007 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub031:2691932:2692007 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub033:637237:637340 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub033:637237:637340 [3] NCCL INFO Connected all trees +gpub033:637237:637340 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub033:637237:637340 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub033:637237:637340 [3] NCCL INFO comm 0x55b77eaa3690 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub031:2691932:2692007 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub031:2691932:2692007 [0] NCCL INFO Connected all rings +gpub031:2691932:2692007 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/Socket/1 +gpub031:2691932:2692007 [0] NCCL INFO Connected all trees +gpub031:2691932:2692007 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:2691932:2692007 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub070:139010:139010 [1] NCCL INFO cudaDriverVersion 12020 +gpub070:139010:139010 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.170<0> +gpub070:139010:139010 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub070:139010:139071 [1] NCCL INFO NET/IB : No device found. +gpub070:139010:139071 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.170<0> [1]hsn0:141.142.145.170<0> [2]eth0:fe80::d7b5:1538:35b8:9e72%eth0<0> +gpub070:139010:139071 [1] NCCL INFO Using network Socket +gpub070:139010:139071 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub070:139010:139071 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub070:139010:139071 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub070:139010:139071 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub070:139010:139071 [1] NCCL INFO Connected all rings +gpub070:139010:139071 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/Socket/1 +gpub008:706295:706295 [0] NCCL INFO cudaDriverVersion 12020 +gpub008:706295:706295 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:706295:706295 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:706295:706403 [0] NCCL INFO NET/IB : No device found. +gpub008:706295:706403 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.108<0> [1]hsn0:141.142.145.108<0> +gpub008:706295:706403 [0] NCCL INFO Using network Socket +gpub008:706295:706403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub008:706295:706403 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub008:706295:706403 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub008:706295:706403 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub031:2691932:2692007 [0] NCCL INFO comm 0x56135bf82070 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub070:139010:139071 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/Socket/1 +gpub070:139010:139071 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub070:139010:139071 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub070:139010:139071 [1] NCCL INFO Connected all trees +gpub070:139010:139071 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub070:139010:139071 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub070:139010:139071 [1] NCCL INFO comm 0x55faac9373d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub008:706295:706403 [0] NCCL INFO Connected all rings +gpub008:706295:706403 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub008:706295:706403 [0] NCCL INFO Connected all trees +gpub008:706295:706403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:706295:706403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:706295:706403 [0] NCCL INFO comm 0x562efbffee80 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub070:139009:139009 [0] NCCL INFO cudaDriverVersion 12020 +gpub070:139009:139009 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.170<0> +gpub070:139009:139009 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub070:139009:139068 [0] NCCL INFO NET/IB : No device found. +gpub070:139009:139068 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.170<0> [1]hsn0:141.142.145.170<0> [2]eth0:fe80::d7b5:1538:35b8:9e72%eth0<0> +gpub070:139009:139068 [0] NCCL INFO Using network Socket +gpub070:139009:139068 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub070:139009:139068 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub070:139009:139068 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub008:706298:706298 [3] NCCL INFO cudaDriverVersion 12020 +gpub008:706298:706298 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:706298:706298 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:706298:706406 [3] NCCL INFO NET/IB : No device found. +gpub008:706298:706406 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.108<0> [1]hsn0:141.142.145.108<0> +gpub008:706298:706406 [3] NCCL INFO Using network Socket +gpub008:706298:706406 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub008:706298:706406 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub008:706298:706406 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub008:706298:706406 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub008:706298:706406 [3] NCCL INFO Connected all rings +gpub008:706298:706406 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub031:2691934:2691934 [2] NCCL INFO cudaDriverVersion 12020 +gpub031:2691934:2691934 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0> +gpub031:2691934:2691934 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub031:2691934:2692010 [2] NCCL INFO NET/IB : No device found. +gpub031:2691934:2692010 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.131<0> [1]hsn0:141.142.145.131<0> +gpub031:2691934:2692010 [2] NCCL INFO Using network Socket +gpub031:2691934:2692010 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub031:2691934:2692010 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub031:2691934:2692010 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub031:2691934:2692010 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub031:2691934:2692010 [2] NCCL INFO Connected all rings +gpub031:2691934:2692010 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub033:637235:637235 [1] NCCL INFO cudaDriverVersion 12020 +gpub033:637235:637235 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.133<0> +gpub033:637235:637235 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub033:637235:637342 [1] NCCL INFO NET/IB : No device found. +gpub033:637235:637342 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.133<0> [1]hsn0:141.142.145.133<0> +gpub033:637235:637342 [1] NCCL INFO Using network Socket +gpub033:637235:637342 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub033:637235:637342 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub033:637235:637342 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub033:637235:637342 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub033:637235:637342 [1] NCCL INFO Connected all rings +gpub033:637235:637342 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub070:139009:139068 [0] NCCL INFO Connected all rings +gpub070:139009:139068 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/Socket/1 +gpub070:139009:139068 [0] NCCL INFO Connected all trees +gpub070:139009:139068 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub070:139009:139068 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:706298:706406 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub008:706298:706406 [3] NCCL INFO Connected all trees +gpub008:706298:706406 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:706298:706406 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:706298:706406 [3] NCCL INFO comm 0x557c18f17cb0 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub008:706296:706296 [1] NCCL INFO cudaDriverVersion 12020 +gpub008:706296:706296 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:706296:706296 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:706296:706405 [1] NCCL INFO NET/IB : No device found. +gpub008:706296:706405 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.108<0> [1]hsn0:141.142.145.108<0> +gpub008:706296:706405 [1] NCCL INFO Using network Socket +gpub008:706296:706405 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub007:700941:700941 [0] NCCL INFO cudaDriverVersion 12020 +gpub007:700941:700941 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:700941:700941 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:700941:701008 [0] NCCL INFO NET/IB : No device found. +gpub007:700941:701008 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:700941:701008 [0] NCCL INFO Using network Socket +gpub007:700941:701008 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub007:700941:701008 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub007:700941:701008 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub007:700941:701008 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub031:2691934:2692010 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub031:2691934:2692010 [2] NCCL INFO Connected all trees +gpub031:2691934:2692010 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub031:2691934:2692010 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub031:2691934:2692010 [2] NCCL INFO comm 0x55ce69f352a0 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub033:637235:637342 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/Socket/1 +gpub033:637235:637342 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub033:637235:637342 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub033:637235:637342 [1] NCCL INFO Connected all trees +gpub033:637235:637342 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub033:637235:637342 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub033:637235:637342 [1] NCCL INFO comm 0x55aa9d4522e0 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub070:139009:139068 [0] NCCL INFO comm 0x55d3c3e48ed0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub008:706296:706405 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub008:706296:706405 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub008:706296:706405 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub008:706296:706405 [1] NCCL INFO Connected all rings +gpub008:706296:706405 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub008:706296:706405 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub008:706296:706405 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub008:706296:706405 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub008:706296:706405 [1] NCCL INFO Connected all trees +gpub008:706296:706405 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:706296:706405 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:706296:706405 [1] NCCL INFO comm 0x5564e505aed0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub007:700941:701008 [0] NCCL INFO Connected all rings +gpub007:700941:701008 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub007:700941:701008 [0] NCCL INFO Connected all trees +gpub007:700941:701008 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:700941:701008 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:700941:701008 [0] NCCL INFO comm 0x5622c0444080 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub070:139011:139011 [2] NCCL INFO cudaDriverVersion 12020 +gpub070:139011:139011 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.170<0> +gpub070:139011:139011 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub070:139011:139070 [2] NCCL INFO NET/IB : No device found. +gpub070:139011:139070 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.170<0> [1]hsn0:141.142.145.170<0> [2]eth0:fe80::d7b5:1538:35b8:9e72%eth0<0> +gpub070:139011:139070 [2] NCCL INFO Using network Socket +gpub070:139011:139070 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub070:139011:139070 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub070:139011:139070 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub070:139011:139070 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub070:139011:139070 [2] NCCL INFO Connected all rings +gpub070:139011:139070 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub008:706297:706297 [2] NCCL INFO cudaDriverVersion 12020 +gpub008:706297:706297 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0> +gpub008:706297:706297 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub008:706297:706404 [2] NCCL INFO NET/IB : No device found. +gpub008:706297:706404 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.108<0> [1]hsn0:141.142.145.108<0> +gpub008:706297:706404 [2] NCCL INFO Using network Socket +gpub008:706297:706404 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub008:706297:706404 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub008:706297:706404 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub008:706297:706404 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub008:706297:706404 [2] NCCL INFO Connected all rings +gpub008:706297:706404 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub070:139011:139070 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub070:139011:139070 [2] NCCL INFO Connected all trees +gpub070:139011:139070 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub070:139011:139070 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub070:139011:139070 [2] NCCL INFO comm 0x560a0e3924a0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub008:706297:706404 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub008:706297:706404 [2] NCCL INFO Connected all trees +gpub008:706297:706404 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub008:706297:706404 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub008:706297:706404 [2] NCCL INFO comm 0x5581acbd1790 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub033:637236:637236 [2] NCCL INFO cudaDriverVersion 12020 +gpub033:637236:637236 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.133<0> +gpub033:637236:637236 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub033:637236:637341 [2] NCCL INFO NET/IB : No device found. +gpub033:637236:637341 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.133<0> [1]hsn0:141.142.145.133<0> +gpub033:637236:637341 [2] NCCL INFO Using network Socket +gpub033:637236:637341 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub033:637236:637341 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub033:637236:637341 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub033:637236:637341 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub033:637236:637341 [2] NCCL INFO Connected all rings +gpub033:637236:637341 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub033:637236:637341 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub033:637236:637341 [2] NCCL INFO Connected all trees +gpub033:637236:637341 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub033:637236:637341 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub033:637236:637341 [2] NCCL INFO comm 0x563da8ef92c0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub094:60545:60545 [0] NCCL INFO cudaDriverVersion 12020 +gpub094:60545:60545 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.194<0> +gpub094:60545:60545 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub094:60545:60627 [0] NCCL INFO NET/IB : No device found. +gpub094:60545:60627 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.194<0> [1]hsn0:141.142.145.194<0> [2]eth0:fe80::c101:6cdd:3eb9:2754%eth0<0> +gpub094:60545:60627 [0] NCCL INFO Using network Socket +gpub094:60545:60627 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub094:60545:60627 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub094:60545:60627 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/Socket/1 +gpub094:60545:60627 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/Socket/1 +gpub094:60545:60627 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub094:60545:60627 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub094:60545:60627 [0] NCCL INFO Connected all rings +gpub094:60545:60627 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/Socket/1 +gpub094:60545:60627 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/Socket/1 +gpub094:60545:60627 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/Socket/1 +gpub094:60545:60627 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/Socket/1 +gpub094:60545:60627 [0] NCCL INFO Connected all trees +gpub094:60545:60627 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub094:60545:60627 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub094:60545:60627 [0] NCCL INFO comm 0x55ca3d9dfd60 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub094:60546:60546 [1] NCCL INFO cudaDriverVersion 12020 +gpub094:60546:60546 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.194<0> +gpub094:60546:60546 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub094:60546:60629 [1] NCCL INFO NET/IB : No device found. +gpub094:60546:60629 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.194<0> [1]hsn0:141.142.145.194<0> [2]eth0:fe80::c101:6cdd:3eb9:2754%eth0<0> +gpub094:60546:60629 [1] NCCL INFO Using network Socket +gpub094:60546:60629 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub094:60546:60629 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub094:60546:60629 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub094:60546:60629 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub094:60546:60629 [1] NCCL INFO Connected all rings +gpub094:60546:60629 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub094:60546:60629 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub094:60546:60629 [1] NCCL INFO Connected all trees +gpub094:60546:60629 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub094:60546:60629 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub094:60546:60629 [1] NCCL INFO comm 0x557937b84ab0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub094:60548:60548 [3] NCCL INFO cudaDriverVersion 12020 +gpub094:60548:60548 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.194<0> +gpub094:60548:60548 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub094:60548:60628 [3] NCCL INFO NET/IB : No device found. +gpub094:60548:60628 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.194<0> [1]hsn0:141.142.145.194<0> [2]eth0:fe80::c101:6cdd:3eb9:2754%eth0<0> +gpub094:60548:60628 [3] NCCL INFO Using network Socket +gpub094:60548:60628 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub094:60548:60628 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub094:60548:60628 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub094:60548:60628 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub094:60548:60628 [3] NCCL INFO Connected all rings +gpub094:60548:60628 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub094:60548:60628 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub094:60548:60628 [3] NCCL INFO Connected all trees +gpub094:60548:60628 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub094:60548:60628 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub094:60548:60628 [3] NCCL INFO comm 0x56460e9b51f0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub094:60547:60547 [2] NCCL INFO cudaDriverVersion 12020 +gpub094:60547:60547 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.194<0> +gpub094:60547:60547 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub094:60547:60630 [2] NCCL INFO NET/IB : No device found. +gpub094:60547:60630 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.194<0> [1]hsn0:141.142.145.194<0> [2]eth0:fe80::c101:6cdd:3eb9:2754%eth0<0> +gpub094:60547:60630 [2] NCCL INFO Using network Socket +gpub094:60547:60630 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub094:60547:60630 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub094:60547:60630 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub094:60547:60630 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub094:60547:60630 [2] NCCL INFO Connected all rings +gpub094:60547:60630 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub094:60547:60630 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub094:60547:60630 [2] NCCL INFO Connected all trees +gpub094:60547:60630 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub094:60547:60630 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub094:60547:60630 [2] NCCL INFO comm 0x564669194bb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub093:60690:60690 [0] NCCL INFO cudaDriverVersion 12020 +gpub093:60690:60690 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.193<0> +gpub093:60690:60690 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub093:60690:60748 [0] NCCL INFO NET/IB : No device found. +gpub093:60690:60748 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.193<0> [1]hsn0:141.142.145.193<0> +gpub093:60690:60748 [0] NCCL INFO Using network Socket +gpub093:60690:60748 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub093:60690:60748 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub093:60690:60748 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub093:60690:60748 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub093:60690:60748 [0] NCCL INFO Connected all rings +gpub093:60690:60748 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/Socket/1 +gpub093:60690:60748 [0] NCCL INFO Connected all trees +gpub093:60690:60748 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub093:60690:60748 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub093:60690:60748 [0] NCCL INFO comm 0x55e28b6f7ba0 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub093:60693:60693 [3] NCCL INFO cudaDriverVersion 12020 +gpub093:60693:60693 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.193<0> +gpub093:60693:60693 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub093:60693:60749 [3] NCCL INFO NET/IB : No device found. +gpub093:60693:60749 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.193<0> [1]hsn0:141.142.145.193<0> +gpub093:60693:60749 [3] NCCL INFO Using network Socket +gpub093:60693:60749 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub093:60693:60749 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub093:60693:60749 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/Socket/1 +gpub093:60693:60749 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/Socket/1 +gpub093:60693:60749 [3] NCCL INFO Connected all rings +gpub093:60693:60749 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub093:60693:60749 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub093:60693:60749 [3] NCCL INFO Connected all trees +gpub093:60693:60749 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub093:60693:60749 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub093:60693:60749 [3] NCCL INFO comm 0x55961e1a00f0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub093:60691:60691 [1] NCCL INFO cudaDriverVersion 12020 +gpub093:60691:60691 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.193<0> +gpub093:60691:60691 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub093:60691:60746 [1] NCCL INFO NET/IB : No device found. +gpub093:60691:60746 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.193<0> [1]hsn0:141.142.145.193<0> +gpub093:60691:60746 [1] NCCL INFO Using network Socket +gpub093:60691:60746 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub093:60691:60746 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub093:60691:60746 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub093:60691:60746 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub093:60691:60746 [1] NCCL INFO Connected all rings +gpub093:60691:60746 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/Socket/1 +gpub093:60691:60746 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/Socket/1 +gpub093:60691:60746 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub093:60691:60746 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub093:60691:60746 [1] NCCL INFO Connected all trees +gpub093:60691:60746 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub093:60691:60746 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub093:60691:60746 [1] NCCL INFO comm 0x562d226779c0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub093:60692:60692 [2] NCCL INFO cudaDriverVersion 12020 +gpub093:60692:60692 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.193<0> +gpub093:60692:60692 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub093:60692:60747 [2] NCCL INFO NET/IB : No device found. +gpub093:60692:60747 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.193<0> [1]hsn0:141.142.145.193<0> +gpub093:60692:60747 [2] NCCL INFO Using network Socket +gpub093:60692:60747 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub093:60692:60747 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub093:60692:60747 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub093:60692:60747 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub093:60692:60747 [2] NCCL INFO Connected all rings +gpub093:60692:60747 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub093:60692:60747 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub093:60692:60747 [2] NCCL INFO Connected all trees +gpub093:60692:60747 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub093:60692:60747 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub093:60692:60747 [2] NCCL INFO comm 0x5556bca62ac0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub090:63850:63850 [0] NCCL INFO cudaDriverVersion 12020 +gpub090:63850:63850 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:63850:63850 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:63850:63908 [0] NCCL INFO NET/IB : No device found. +gpub090:63850:63908 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.190<0> [1]hsn0:141.142.145.190<0> [2]eth0:fe80::52fd:9d21:2743:30b0%eth0<0> +gpub090:63850:63908 [0] NCCL INFO Using network Socket +gpub090:63850:63908 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub090:63850:63908 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub090:63850:63908 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub090:63850:63908 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub090:63850:63908 [0] NCCL INFO Connected all rings +gpub090:63850:63908 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/Socket/1 +gpub090:63850:63908 [0] NCCL INFO Connected all trees +gpub090:63850:63908 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub090:63850:63908 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:63850:63908 [0] NCCL INFO comm 0x556653b86560 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub090:63852:63852 [2] NCCL INFO cudaDriverVersion 12020 +gpub090:63852:63852 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:63852:63852 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:63852:63909 [2] NCCL INFO NET/IB : No device found. +gpub090:63852:63909 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.190<0> [1]hsn0:141.142.145.190<0> [2]eth0:fe80::52fd:9d21:2743:30b0%eth0<0> +gpub090:63852:63909 [2] NCCL INFO Using network Socket +gpub090:63852:63909 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub090:63852:63909 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub090:63852:63909 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub090:63852:63909 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub090:63852:63909 [2] NCCL INFO Connected all rings +gpub090:63852:63909 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub090:63852:63909 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub090:63852:63909 [2] NCCL INFO Connected all trees +gpub090:63852:63909 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub090:63852:63909 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:63852:63909 [2] NCCL INFO comm 0x555d155f72d0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub090:63853:63853 [3] NCCL INFO cudaDriverVersion 12020 +gpub090:63853:63853 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:63853:63853 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:63853:63910 [3] NCCL INFO NET/IB : No device found. +gpub090:63853:63910 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.190<0> [1]hsn0:141.142.145.190<0> [2]eth0:fe80::52fd:9d21:2743:30b0%eth0<0> +gpub090:63853:63910 [3] NCCL INFO Using network Socket +gpub090:63853:63910 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub090:63853:63910 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub090:63853:63910 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/Socket/1 +gpub090:63853:63910 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/Socket/1 +gpub090:63853:63910 [3] NCCL INFO Connected all rings +gpub090:63853:63910 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub090:63853:63910 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub090:63853:63910 [3] NCCL INFO Connected all trees +gpub090:63853:63910 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub090:63853:63910 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:63853:63910 [3] NCCL INFO comm 0x55f5ef0199d0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub090:63851:63851 [1] NCCL INFO cudaDriverVersion 12020 +gpub090:63851:63851 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0> +gpub090:63851:63851 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub090:63851:63911 [1] NCCL INFO NET/IB : No device found. +gpub090:63851:63911 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.190<0> [1]hsn0:141.142.145.190<0> [2]eth0:fe80::52fd:9d21:2743:30b0%eth0<0> +gpub090:63851:63911 [1] NCCL INFO Using network Socket +gpub090:63851:63911 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub090:63851:63911 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub090:63851:63911 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub090:63851:63911 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub090:63851:63911 [1] NCCL INFO Connected all rings +gpub090:63851:63911 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/Socket/1 +gpub090:63851:63911 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/Socket/1 +gpub090:63851:63911 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub090:63851:63911 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub090:63851:63911 [1] NCCL INFO Connected all trees +gpub090:63851:63911 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub090:63851:63911 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub090:63851:63911 [1] NCCL INFO comm 0x558a2c937b10 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub089:61054:61054 [2] NCCL INFO cudaDriverVersion 12020 +gpub089:61054:61054 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:61054:61054 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:61054:61128 [2] NCCL INFO NET/IB : No device found. +gpub089:61054:61128 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:61054:61128 [2] NCCL INFO Using network Socket +gpub089:61054:61128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub089:61054:61128 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub089:61054:61128 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub089:61054:61128 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub089:61054:61128 [2] NCCL INFO Connected all rings +gpub089:61054:61128 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub089:61054:61128 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub089:61054:61128 [2] NCCL INFO Connected all trees +gpub089:61054:61128 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:61054:61128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:61054:61128 [2] NCCL INFO comm 0x556f92a146b0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub089:61055:61055 [3] NCCL INFO cudaDriverVersion 12020 +gpub089:61055:61055 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:61055:61055 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:61055:61127 [3] NCCL INFO NET/IB : No device found. +gpub089:61055:61127 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:61055:61127 [3] NCCL INFO Using network Socket +gpub089:61055:61127 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub089:61055:61127 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub089:61055:61127 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/Socket/1 +gpub089:61055:61127 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/Socket/1 +gpub089:61055:61127 [3] NCCL INFO Connected all rings +gpub089:61055:61127 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub089:61055:61127 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub089:61055:61127 [3] NCCL INFO Connected all trees +gpub089:61055:61127 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:61055:61127 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:61055:61127 [3] NCCL INFO comm 0x561cb8084a10 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub089:61052:61052 [0] NCCL INFO cudaDriverVersion 12020 +gpub089:61052:61052 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:61052:61052 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:61052:61129 [0] NCCL INFO NET/IB : No device found. +gpub089:61052:61129 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:61052:61129 [0] NCCL INFO Using network Socket +gpub089:61052:61129 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub089:61052:61129 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub089:61052:61129 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub089:61052:61129 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub089:61052:61129 [0] NCCL INFO Connected all rings +gpub089:61052:61129 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/Socket/1 +gpub089:61052:61129 [0] NCCL INFO Connected all trees +gpub089:61052:61129 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:61052:61129 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:61052:61129 [0] NCCL INFO comm 0x556c05002e90 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub089:61053:61053 [1] NCCL INFO cudaDriverVersion 12020 +gpub089:61053:61053 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:61053:61053 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:61053:61130 [1] NCCL INFO NET/IB : No device found. +gpub089:61053:61130 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:61053:61130 [1] NCCL INFO Using network Socket +gpub089:61053:61130 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub089:61053:61130 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub089:61053:61130 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub089:61053:61130 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub089:61053:61130 [1] NCCL INFO Connected all rings +gpub089:61053:61130 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/Socket/1 +gpub089:61053:61130 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/Socket/1 +gpub089:61053:61130 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub089:61053:61130 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub089:61053:61130 [1] NCCL INFO Connected all trees +gpub089:61053:61130 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:61053:61130 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:61053:61130 [1] NCCL INFO comm 0x562223cb68e0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +[gpub006:0/64] 2024-01-24 22:55:27,793 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub006:0/64] 2024-01-24 23:02:31,805 (trainer:753) INFO: 13epoch:train:1-100batch: iter_time=1.149, forward_time=0.333, loss_ctc=103.053, loss_interctc_layer6=100.521, loss_interctc_layer12=85.715, loss_interctc_layer15=80.035, loss_interctc_layer21=105.125, loss=94.890, backward_time=0.814, grad_norm=89.106, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.155e-04, train_time=18.164 +[gpub006:0/64] 2024-01-24 23:11:15,735 (trainer:753) INFO: 13epoch:train:101-200batch: iter_time=9.754e-05, forward_time=0.200, loss_ctc=110.513, loss_interctc_layer6=101.427, loss_interctc_layer12=85.563, loss_interctc_layer15=79.330, loss_interctc_layer21=113.333, loss=98.033, backward_time=1.061, grad_norm=69.764, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.154e-04, train_time=5.241 +[gpub006:0/64] 2024-01-24 23:20:58,337 (trainer:753) INFO: 13epoch:train:201-300batch: iter_time=9.093e-05, forward_time=0.200, loss_ctc=103.954, loss_interctc_layer6=99.720, loss_interctc_layer12=85.951, loss_interctc_layer15=80.359, loss_interctc_layer21=106.396, loss=95.276, backward_time=0.977, grad_norm=67.528, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.154e-04, train_time=5.826 +[gpub006:0/64] 2024-01-24 23:30:39,899 (trainer:753) INFO: 13epoch:train:301-400batch: iter_time=1.025e-04, forward_time=0.200, loss_ctc=100.567, loss_interctc_layer6=105.297, loss_interctc_layer12=88.960, loss_interctc_layer15=82.300, loss_interctc_layer21=102.786, loss=95.982, backward_time=1.040, grad_norm=76.957, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.154e-04, train_time=5.815 +[gpub006:0/64] 2024-01-24 23:38:41,960 (trainer:753) INFO: 13epoch:train:401-500batch: iter_time=9.727e-05, forward_time=0.198, loss_ctc=96.555, loss_interctc_layer6=90.034, loss_interctc_layer12=75.865, loss_interctc_layer15=70.170, loss_interctc_layer21=98.836, loss=86.292, backward_time=0.823, grad_norm=67.405, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.153e-04, train_time=4.820 +[gpub006:0/64] 2024-01-24 23:47:07,250 (trainer:753) INFO: 13epoch:train:501-600batch: iter_time=9.606e-05, forward_time=0.230, loss_ctc=96.724, loss_interctc_layer6=92.641, loss_interctc_layer12=78.887, loss_interctc_layer15=73.323, loss_interctc_layer21=99.086, loss=88.132, backward_time=0.873, grad_norm=85.782, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.153e-04, train_time=5.053 +[gpub006:0/64] 2024-01-24 23:55:43,795 (trainer:753) INFO: 13epoch:train:601-700batch: iter_time=9.948e-05, forward_time=0.199, loss_ctc=90.435, loss_interctc_layer6=94.042, loss_interctc_layer12=79.395, loss_interctc_layer15=73.864, loss_interctc_layer21=92.080, loss=85.963, backward_time=1.001, grad_norm=80.664, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.153e-04, train_time=5.165 +[gpub006:0/64] 2024-01-25 00:04:02,965 (trainer:753) INFO: 13epoch:train:701-800batch: iter_time=9.863e-05, forward_time=0.296, loss_ctc=97.225, loss_interctc_layer6=98.287, loss_interctc_layer12=83.173, loss_interctc_layer15=77.563, loss_interctc_layer21=98.873, loss=91.024, backward_time=0.863, grad_norm=69.223, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.152e-04, train_time=4.991 +[gpub006:0/64] 2024-01-25 00:11:55,714 (trainer:753) INFO: 13epoch:train:801-900batch: iter_time=9.422e-05, forward_time=0.199, loss_ctc=101.506, loss_interctc_layer6=97.808, loss_interctc_layer12=83.372, loss_interctc_layer15=77.328, loss_interctc_layer21=103.712, loss=92.745, backward_time=0.804, grad_norm=87.994, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.152e-04, train_time=4.727 +[gpub006:0/64] 2024-01-25 00:19:09,850 (trainer:753) INFO: 13epoch:train:901-1000batch: iter_time=8.822e-05, forward_time=0.198, loss_ctc=104.420, loss_interctc_layer6=107.760, loss_interctc_layer12=92.406, loss_interctc_layer15=86.695, loss_interctc_layer21=106.566, loss=99.570, backward_time=0.718, grad_norm=94.628, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.152e-04, train_time=4.341 +[gpub006:0/64] 2024-01-25 00:26:12,630 (trainer:753) INFO: 13epoch:train:1001-1100batch: iter_time=8.153e-05, forward_time=0.198, loss_ctc=90.963, loss_interctc_layer6=91.424, loss_interctc_layer12=76.715, loss_interctc_layer15=70.776, loss_interctc_layer21=93.243, loss=84.624, backward_time=0.712, grad_norm=66.711, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.151e-04, train_time=4.228 +[gpub006:0/64] 2024-01-25 00:34:57,069 (trainer:753) INFO: 13epoch:train:1101-1200batch: iter_time=8.123e-05, forward_time=0.199, loss_ctc=98.047, loss_interctc_layer6=92.645, loss_interctc_layer12=79.389, loss_interctc_layer15=74.239, loss_interctc_layer21=100.091, loss=88.882, backward_time=0.915, grad_norm=64.977, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.151e-04, train_time=5.244 +[gpub006:0/64] 2024-01-25 00:39:08,491 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub006:0/64] 2024-01-25 00:39:27,300 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 00:39:30,879 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 00:39:30,879 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub006:0/64] 2024-01-25 00:39:30,882 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 00:53:14,488 (trainer:753) INFO: 13epoch:train:1201-1300batch: iter_time=6.217, forward_time=0.272, loss_ctc=101.192, loss_interctc_layer6=98.794, loss_interctc_layer12=83.319, loss_interctc_layer15=77.188, loss_interctc_layer21=103.626, loss=92.824, backward_time=0.719, grad_norm=72.377, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.229, optim0_lr0=1.151e-04, train_time=10.973 +[gpub006:0/64] 2024-01-25 00:58:59,292 (trainer:753) INFO: 13epoch:train:1301-1400batch: iter_time=8.768e-05, forward_time=0.204, loss_ctc=98.224, loss_interctc_layer6=99.650, loss_interctc_layer12=83.742, loss_interctc_layer15=77.682, loss_interctc_layer21=100.477, loss=91.955, backward_time=0.602, grad_norm=76.911, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.150e-04, train_time=3.449 +[gpub006:0/64] 2024-01-25 01:05:38,046 (trainer:753) INFO: 13epoch:train:1401-1500batch: iter_time=9.438e-05, forward_time=0.201, loss_ctc=99.680, loss_interctc_layer6=96.095, loss_interctc_layer12=80.965, loss_interctc_layer15=75.433, loss_interctc_layer21=102.268, loss=90.888, backward_time=0.708, grad_norm=109.313, clip=100.000, loss_scale=2.089e+31, optim_step_time=0.224, optim0_lr0=1.150e-04, train_time=3.987 +[gpub006:0/64] 2024-01-25 01:12:13,318 (trainer:753) INFO: 13epoch:train:1501-1600batch: iter_time=9.924e-05, forward_time=0.202, loss_ctc=100.098, loss_interctc_layer6=109.512, loss_interctc_layer12=92.938, loss_interctc_layer15=86.786, loss_interctc_layer21=102.371, loss=98.341, backward_time=0.638, grad_norm=81.047, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.150e-04, train_time=3.953 +[gpub006:0/64] 2024-01-25 01:19:46,413 (trainer:753) INFO: 13epoch:train:1601-1700batch: iter_time=8.685e-05, forward_time=0.200, loss_ctc=89.706, loss_interctc_layer6=92.460, loss_interctc_layer12=77.487, loss_interctc_layer15=71.617, loss_interctc_layer21=91.798, loss=84.614, backward_time=0.816, grad_norm=69.485, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.149e-04, train_time=4.531 +[gpub006:0/64] 2024-01-25 01:26:12,989 (trainer:753) INFO: 13epoch:train:1701-1800batch: iter_time=8.960e-05, forward_time=0.201, loss_ctc=82.678, loss_interctc_layer6=87.001, loss_interctc_layer12=73.043, loss_interctc_layer15=67.513, loss_interctc_layer21=84.767, loss=79.000, backward_time=0.701, grad_norm=68.234, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.149e-04, train_time=3.866 +[gpub006:0/64] 2024-01-25 01:32:50,131 (trainer:753) INFO: 13epoch:train:1801-1900batch: iter_time=8.529e-05, forward_time=0.201, loss_ctc=93.207, loss_interctc_layer6=93.830, loss_interctc_layer12=79.504, loss_interctc_layer15=74.044, loss_interctc_layer21=95.544, loss=87.226, backward_time=0.853, grad_norm=67.959, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.149e-04, train_time=3.971 +[gpub006:0/64] 2024-01-25 01:38:22,284 (trainer:753) INFO: 13epoch:train:1901-2000batch: iter_time=8.558e-05, forward_time=0.201, loss_ctc=92.331, loss_interctc_layer6=99.883, loss_interctc_layer12=84.660, loss_interctc_layer15=78.830, loss_interctc_layer21=94.330, loss=90.007, backward_time=0.551, grad_norm=88.056, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.148e-04, train_time=3.321 +[gpub006:0/64] 2024-01-25 01:43:41,961 (trainer:753) INFO: 13epoch:train:2001-2100batch: iter_time=8.745e-05, forward_time=0.201, loss_ctc=87.523, loss_interctc_layer6=92.738, loss_interctc_layer12=77.867, loss_interctc_layer15=72.311, loss_interctc_layer21=89.372, loss=83.962, backward_time=0.570, grad_norm=67.794, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.148e-04, train_time=3.197 +[gpub006:0/64] 2024-01-25 01:45:33,653 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-25 01:49:40,152 (trainer:753) INFO: 13epoch:train:2101-2200batch: iter_time=8.997e-05, forward_time=0.202, loss_ctc=95.849, loss_interctc_layer6=97.298, loss_interctc_layer12=82.209, loss_interctc_layer15=76.375, loss_interctc_layer21=98.026, loss=89.951, backward_time=0.556, grad_norm=98.523, clip=100.000, loss_scale=2.684e+31, optim_step_time=0.224, optim0_lr0=1.148e-04, train_time=3.582 +[gpub006:0/64] 2024-01-25 01:55:36,743 (trainer:753) INFO: 13epoch:train:2201-2300batch: iter_time=9.313e-05, forward_time=0.217, loss_ctc=109.422, loss_interctc_layer6=111.481, loss_interctc_layer12=93.801, loss_interctc_layer15=87.104, loss_interctc_layer21=111.449, loss=102.651, backward_time=0.589, grad_norm=107.313, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.148e-04, train_time=3.566 +[gpub006:0/64] 2024-01-25 02:01:47,351 (trainer:753) INFO: 13epoch:train:2301-2400batch: iter_time=8.663e-05, forward_time=0.221, loss_ctc=80.847, loss_interctc_layer6=86.521, loss_interctc_layer12=73.425, loss_interctc_layer15=68.449, loss_interctc_layer21=82.669, loss=78.382, backward_time=0.627, grad_norm=65.492, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.147e-04, train_time=3.706 +[gpub006:0/64] 2024-01-25 02:08:13,721 (trainer:753) INFO: 13epoch:train:2401-2500batch: iter_time=8.249e-05, forward_time=0.204, loss_ctc=89.295, loss_interctc_layer6=91.845, loss_interctc_layer12=77.445, loss_interctc_layer15=71.667, loss_interctc_layer21=91.218, loss=84.294, backward_time=0.624, grad_norm=70.794, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.147e-04, train_time=3.863 +[gpub006:0/64] 2024-01-25 02:08:21,919 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub006:0/64] 2024-01-25 02:08:40,767 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 02:08:44,218 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 02:08:44,218 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub006:0/64] 2024-01-25 02:08:44,329 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 02:23:46,477 (trainer:753) INFO: 13epoch:train:2501-2600batch: iter_time=1.890, forward_time=0.232, loss_ctc=102.726, loss_interctc_layer6=98.981, loss_interctc_layer12=84.099, loss_interctc_layer15=78.433, loss_interctc_layer21=105.058, loss=93.860, backward_time=0.529, grad_norm=72.169, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.147e-04, train_time=9.326 +[gpub006:0/64] 2024-01-25 02:31:11,152 (trainer:753) INFO: 13epoch:train:2601-2700batch: iter_time=8.778e-05, forward_time=0.203, loss_ctc=109.858, loss_interctc_layer6=100.236, loss_interctc_layer12=83.987, loss_interctc_layer15=77.738, loss_interctc_layer21=112.396, loss=96.843, backward_time=0.752, grad_norm=108.949, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.146e-04, train_time=4.447 +[gpub006:0/64] 2024-01-25 02:39:00,573 (trainer:753) INFO: 13epoch:train:2701-2800batch: iter_time=8.910e-05, forward_time=0.202, loss_ctc=100.850, loss_interctc_layer6=97.864, loss_interctc_layer12=83.815, loss_interctc_layer15=78.222, loss_interctc_layer21=103.246, loss=92.799, backward_time=0.760, grad_norm=76.279, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.146e-04, train_time=4.694 +[gpub006:0/64] 2024-01-25 02:46:40,026 (trainer:753) INFO: 13epoch:train:2801-2900batch: iter_time=8.642e-05, forward_time=0.201, loss_ctc=99.632, loss_interctc_layer6=104.282, loss_interctc_layer12=87.594, loss_interctc_layer15=81.049, loss_interctc_layer21=101.864, loss=94.884, backward_time=0.734, grad_norm=93.965, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.146e-04, train_time=4.594 +[gpub006:0/64] 2024-01-25 02:54:10,761 (trainer:753) INFO: 13epoch:train:2901-3000batch: iter_time=8.707e-05, forward_time=0.201, loss_ctc=95.269, loss_interctc_layer6=89.995, loss_interctc_layer12=75.410, loss_interctc_layer15=69.671, loss_interctc_layer21=97.662, loss=85.601, backward_time=0.700, grad_norm=69.468, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.145e-04, train_time=4.507 +[gpub006:0/64] 2024-01-25 02:56:27,271 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-25 03:00:47,959 (trainer:753) INFO: 13epoch:train:3001-3100batch: iter_time=8.714e-05, forward_time=0.201, loss_ctc=94.488, loss_interctc_layer6=91.045, loss_interctc_layer12=76.635, loss_interctc_layer15=71.192, loss_interctc_layer21=96.917, loss=86.055, backward_time=0.658, grad_norm=100.113, clip=100.000, loss_scale=1.342e+31, optim_step_time=0.224, optim0_lr0=1.145e-04, train_time=3.972 +[gpub006:0/64] 2024-01-25 03:07:48,752 (trainer:753) INFO: 13epoch:train:3101-3200batch: iter_time=8.941e-05, forward_time=0.201, loss_ctc=89.105, loss_interctc_layer6=92.677, loss_interctc_layer12=78.235, loss_interctc_layer15=72.576, loss_interctc_layer21=91.073, loss=84.733, backward_time=0.690, grad_norm=80.658, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.145e-04, train_time=4.208 +[gpub006:0/64] 2024-01-25 03:15:33,692 (trainer:753) INFO: 13epoch:train:3201-3300batch: iter_time=8.767e-05, forward_time=0.201, loss_ctc=96.287, loss_interctc_layer6=97.484, loss_interctc_layer12=82.131, loss_interctc_layer15=76.157, loss_interctc_layer21=98.230, loss=90.058, backward_time=0.832, grad_norm=72.633, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.144e-04, train_time=4.649 +[gpub006:0/64] 2024-01-25 03:22:44,625 (trainer:753) INFO: 13epoch:train:3301-3400batch: iter_time=9.015e-05, forward_time=0.209, loss_ctc=100.348, loss_interctc_layer6=97.560, loss_interctc_layer12=82.294, loss_interctc_layer15=76.387, loss_interctc_layer21=102.461, loss=91.810, backward_time=0.654, grad_norm=101.311, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.144e-04, train_time=4.309 +[gpub006:0/64] 2024-01-25 03:30:13,594 (trainer:753) INFO: 13epoch:train:3401-3500batch: iter_time=8.809e-05, forward_time=0.260, loss_ctc=98.819, loss_interctc_layer6=104.007, loss_interctc_layer12=88.118, loss_interctc_layer15=81.587, loss_interctc_layer21=101.560, loss=94.818, backward_time=0.663, grad_norm=96.843, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.144e-04, train_time=4.489 +[gpub006:0/64] 2024-01-25 03:37:30,670 (trainer:753) INFO: 13epoch:train:3501-3600batch: iter_time=9.142e-05, forward_time=0.201, loss_ctc=89.885, loss_interctc_layer6=90.603, loss_interctc_layer12=75.603, loss_interctc_layer15=69.682, loss_interctc_layer21=92.013, loss=83.557, backward_time=0.746, grad_norm=80.519, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.143e-04, train_time=4.371 +[gpub006:0/64] 2024-01-25 03:44:08,283 (trainer:753) INFO: 13epoch:train:3601-3700batch: iter_time=8.712e-05, forward_time=0.201, loss_ctc=94.593, loss_interctc_layer6=89.665, loss_interctc_layer12=76.209, loss_interctc_layer15=70.839, loss_interctc_layer21=96.901, loss=85.642, backward_time=0.664, grad_norm=82.933, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.143e-04, train_time=3.976 +[gpub006:0/64] 2024-01-25 03:47:47,768 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub006:0/64] 2024-01-25 03:48:06,327 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 03:48:09,762 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 03:48:09,762 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub006:0/64] 2024-01-25 03:48:09,850 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 04:00:36,393 (trainer:753) INFO: 13epoch:train:3701-3800batch: iter_time=2.447, forward_time=0.228, loss_ctc=99.527, loss_interctc_layer6=97.060, loss_interctc_layer12=81.626, loss_interctc_layer15=75.937, loss_interctc_layer21=101.984, loss=91.227, backward_time=0.571, grad_norm=93.481, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.143e-04, train_time=9.881 +[gpub006:0/64] 2024-01-25 04:06:37,542 (trainer:753) INFO: 13epoch:train:3801-3900batch: iter_time=8.326e-05, forward_time=0.202, loss_ctc=97.856, loss_interctc_layer6=98.576, loss_interctc_layer12=82.988, loss_interctc_layer15=77.086, loss_interctc_layer21=100.236, loss=91.348, backward_time=0.634, grad_norm=76.364, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.143e-04, train_time=3.611 +[gpub006:0/64] 2024-01-25 04:12:30,178 (trainer:753) INFO: 13epoch:train:3901-4000batch: iter_time=8.657e-05, forward_time=0.203, loss_ctc=98.702, loss_interctc_layer6=94.450, loss_interctc_layer12=79.402, loss_interctc_layer15=73.793, loss_interctc_layer21=100.657, loss=89.401, backward_time=0.652, grad_norm=80.822, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.142e-04, train_time=3.526 +[gpub006:0/64] 2024-01-25 04:19:12,987 (trainer:753) INFO: 13epoch:train:4001-4100batch: iter_time=9.259e-05, forward_time=0.203, loss_ctc=99.987, loss_interctc_layer6=109.645, loss_interctc_layer12=93.191, loss_interctc_layer15=86.882, loss_interctc_layer21=102.091, loss=98.359, backward_time=0.690, grad_norm=73.983, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.142e-04, train_time=4.028 +[gpub006:0/64] 2024-01-25 04:25:17,081 (trainer:753) INFO: 13epoch:train:4101-4200batch: iter_time=8.601e-05, forward_time=0.201, loss_ctc=88.182, loss_interctc_layer6=91.233, loss_interctc_layer12=76.319, loss_interctc_layer15=70.376, loss_interctc_layer21=90.440, loss=83.310, backward_time=0.632, grad_norm=82.750, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.142e-04, train_time=3.641 +[gpub006:0/64] 2024-01-25 04:32:03,736 (trainer:753) INFO: 13epoch:train:4201-4300batch: iter_time=8.679e-05, forward_time=0.200, loss_ctc=82.755, loss_interctc_layer6=86.761, loss_interctc_layer12=72.652, loss_interctc_layer15=67.221, loss_interctc_layer21=84.061, loss=78.690, backward_time=0.731, grad_norm=88.775, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.141e-04, train_time=4.066 +[gpub006:0/64] 2024-01-25 04:37:50,187 (trainer:753) INFO: 13epoch:train:4301-4400batch: iter_time=8.958e-05, forward_time=0.201, loss_ctc=92.122, loss_interctc_layer6=93.306, loss_interctc_layer12=78.703, loss_interctc_layer15=73.197, loss_interctc_layer21=94.062, loss=86.278, backward_time=0.561, grad_norm=130.442, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.141e-04, train_time=3.464 +[gpub006:0/64] 2024-01-25 04:44:06,233 (trainer:753) INFO: 13epoch:train:4401-4500batch: iter_time=8.720e-05, forward_time=0.202, loss_ctc=90.715, loss_interctc_layer6=99.580, loss_interctc_layer12=83.868, loss_interctc_layer15=77.766, loss_interctc_layer21=92.795, loss=88.945, backward_time=0.679, grad_norm=97.598, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.141e-04, train_time=3.760 +[gpub006:0/64] 2024-01-25 04:50:23,464 (trainer:753) INFO: 13epoch:train:4501-4600batch: iter_time=8.935e-05, forward_time=0.204, loss_ctc=85.840, loss_interctc_layer6=91.347, loss_interctc_layer12=76.594, loss_interctc_layer15=70.859, loss_interctc_layer21=87.546, loss=82.437, backward_time=0.708, grad_norm=75.982, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.140e-04, train_time=3.772 +[gpub006:0/64] 2024-01-25 04:56:38,580 (trainer:753) INFO: 13epoch:train:4601-4700batch: iter_time=9.505e-05, forward_time=0.203, loss_ctc=94.944, loss_interctc_layer6=96.489, loss_interctc_layer12=81.356, loss_interctc_layer15=75.419, loss_interctc_layer21=96.861, loss=89.014, backward_time=0.646, grad_norm=66.674, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.140e-04, train_time=3.751 +[gpub006:0/64] 2024-01-25 05:02:45,098 (trainer:753) INFO: 13epoch:train:4701-4800batch: iter_time=8.476e-05, forward_time=0.263, loss_ctc=104.580, loss_interctc_layer6=106.372, loss_interctc_layer12=89.461, loss_interctc_layer15=83.528, loss_interctc_layer21=107.141, loss=98.216, backward_time=0.597, grad_norm=85.762, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.230, optim0_lr0=1.140e-04, train_time=3.665 +[gpub006:0/64] 2024-01-25 05:08:55,395 (trainer:753) INFO: 13epoch:train:4801-4900batch: iter_time=8.619e-05, forward_time=0.200, loss_ctc=80.011, loss_interctc_layer6=86.484, loss_interctc_layer12=72.695, loss_interctc_layer15=67.738, loss_interctc_layer21=82.546, loss=77.895, backward_time=0.566, grad_norm=77.163, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.139e-04, train_time=3.703 +[gpub006:0/64] 2024-01-25 05:15:06,012 (trainer:753) INFO: 13epoch:train:4901-5000batch: iter_time=8.475e-05, forward_time=0.201, loss_ctc=89.030, loss_interctc_layer6=91.715, loss_interctc_layer12=77.134, loss_interctc_layer15=71.334, loss_interctc_layer21=91.164, loss=84.076, backward_time=0.589, grad_norm=85.395, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.139e-04, train_time=3.706 +[gpub006:0/64] 2024-01-25 05:15:17,247 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub006:0/64] 2024-01-25 05:15:35,668 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 05:15:39,291 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 05:15:39,291 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub006:0/64] 2024-01-25 05:15:39,294 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 05:31:33,128 (trainer:753) INFO: 13epoch:train:5001-5100batch: iter_time=1.831, forward_time=0.235, loss_ctc=101.829, loss_interctc_layer6=98.559, loss_interctc_layer12=83.399, loss_interctc_layer15=77.611, loss_interctc_layer21=103.877, loss=93.055, backward_time=0.572, grad_norm=81.986, clip=100.000, loss_scale=1.694e+31, optim_step_time=0.225, optim0_lr0=1.139e-04, train_time=9.870 +[gpub006:0/64] 2024-01-25 05:38:30,377 (trainer:753) INFO: 13epoch:train:5101-5200batch: iter_time=8.307e-05, forward_time=0.204, loss_ctc=109.384, loss_interctc_layer6=99.805, loss_interctc_layer12=83.538, loss_interctc_layer15=77.187, loss_interctc_layer21=112.256, loss=96.434, backward_time=0.618, grad_norm=83.985, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.139e-04, train_time=4.174 +[gpub006:0/64] 2024-01-25 05:44:19,192 (trainer:753) INFO: 13epoch:train:5201-5300batch: iter_time=8.198e-05, forward_time=0.202, loss_ctc=102.310, loss_interctc_layer6=96.445, loss_interctc_layer12=81.816, loss_interctc_layer15=76.234, loss_interctc_layer21=105.086, loss=92.378, backward_time=0.528, grad_norm=88.622, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.138e-04, train_time=3.488 +[gpub006:0/64] 2024-01-25 05:50:36,055 (trainer:753) INFO: 13epoch:train:5301-5400batch: iter_time=8.419e-05, forward_time=0.201, loss_ctc=99.461, loss_interctc_layer6=103.964, loss_interctc_layer12=87.491, loss_interctc_layer15=80.937, loss_interctc_layer21=101.606, loss=94.692, backward_time=0.645, grad_norm=91.923, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.138e-04, train_time=3.768 +[gpub006:0/64] 2024-01-25 05:56:09,005 (trainer:753) INFO: 13epoch:train:5401-5500batch: iter_time=8.162e-05, forward_time=0.202, loss_ctc=96.371, loss_interctc_layer6=89.457, loss_interctc_layer12=74.936, loss_interctc_layer15=69.332, loss_interctc_layer21=98.747, loss=85.769, backward_time=0.542, grad_norm=87.795, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.138e-04, train_time=3.329 +[gpub006:0/64] 2024-01-25 06:02:42,051 (trainer:753) INFO: 13epoch:train:5501-5600batch: iter_time=8.513e-05, forward_time=0.201, loss_ctc=93.794, loss_interctc_layer6=91.088, loss_interctc_layer12=76.633, loss_interctc_layer15=71.162, loss_interctc_layer21=96.495, loss=85.834, backward_time=0.682, grad_norm=100.301, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.137e-04, train_time=3.930 +[gpub006:0/64] 2024-01-25 06:08:34,112 (trainer:753) INFO: 13epoch:train:5601-5700batch: iter_time=8.256e-05, forward_time=0.201, loss_ctc=88.944, loss_interctc_layer6=92.656, loss_interctc_layer12=78.249, loss_interctc_layer15=72.434, loss_interctc_layer21=91.183, loss=84.693, backward_time=0.524, grad_norm=86.106, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.137e-04, train_time=3.520 +[gpub006:0/64] 2024-01-25 06:14:49,137 (trainer:753) INFO: 13epoch:train:5701-5800batch: iter_time=8.488e-05, forward_time=0.311, loss_ctc=95.587, loss_interctc_layer6=97.414, loss_interctc_layer12=81.914, loss_interctc_layer15=75.719, loss_interctc_layer21=97.704, loss=89.668, backward_time=0.619, grad_norm=83.804, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.229, optim0_lr0=1.137e-04, train_time=3.750 +[gpub006:0/64] 2024-01-25 06:21:14,032 (trainer:753) INFO: 13epoch:train:5801-5900batch: iter_time=8.370e-05, forward_time=0.202, loss_ctc=99.274, loss_interctc_layer6=96.932, loss_interctc_layer12=81.564, loss_interctc_layer15=75.385, loss_interctc_layer21=101.412, loss=90.913, backward_time=0.584, grad_norm=130.976, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.136e-04, train_time=3.849 +[gpub006:0/64] 2024-01-25 06:27:20,028 (trainer:753) INFO: 13epoch:train:5901-6000batch: iter_time=8.702e-05, forward_time=0.201, loss_ctc=98.008, loss_interctc_layer6=102.406, loss_interctc_layer12=86.723, loss_interctc_layer15=80.518, loss_interctc_layer21=100.291, loss=93.589, backward_time=0.597, grad_norm=94.547, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.136e-04, train_time=3.660 +[gpub006:0/64] 2024-01-25 06:33:14,515 (trainer:753) INFO: 13epoch:train:6001-6100batch: iter_time=8.208e-05, forward_time=0.251, loss_ctc=88.536, loss_interctc_layer6=89.227, loss_interctc_layer12=74.531, loss_interctc_layer15=68.670, loss_interctc_layer21=90.686, loss=82.330, backward_time=0.593, grad_norm=71.261, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.136e-04, train_time=3.544 +[gpub006:0/64] 2024-01-25 06:38:45,684 (trainer:753) INFO: 13epoch:train:6101-6200batch: iter_time=8.130e-05, forward_time=0.209, loss_ctc=93.979, loss_interctc_layer6=88.906, loss_interctc_layer12=75.219, loss_interctc_layer15=69.909, loss_interctc_layer21=95.966, loss=84.796, backward_time=0.508, grad_norm=82.575, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.135e-04, train_time=3.312 +[gpub006:0/64] 2024-01-25 06:42:16,572 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub006:0/64] 2024-01-25 06:42:34,813 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 06:42:38,215 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 06:42:38,215 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub006:0/64] 2024-01-25 06:42:38,241 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 06:55:34,909 (trainer:753) INFO: 13epoch:train:6201-6300batch: iter_time=3.369, forward_time=0.202, loss_ctc=99.987, loss_interctc_layer6=96.671, loss_interctc_layer12=81.124, loss_interctc_layer15=75.031, loss_interctc_layer21=102.341, loss=91.031, backward_time=0.533, grad_norm=80.152, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.135e-04, train_time=10.092 +[gpub006:0/64] 2024-01-25 07:01:59,709 (trainer:753) INFO: 13epoch:train:6301-6400batch: iter_time=8.386e-05, forward_time=0.202, loss_ctc=100.912, loss_interctc_layer6=98.410, loss_interctc_layer12=82.905, loss_interctc_layer15=76.832, loss_interctc_layer21=103.195, loss=92.451, backward_time=0.671, grad_norm=114.469, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.135e-04, train_time=3.848 +[gpub006:0/64] 2024-01-25 07:07:19,552 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-25 07:08:05,783 (trainer:753) INFO: 13epoch:train:6401-6500batch: iter_time=8.861e-05, forward_time=0.203, loss_ctc=102.154, loss_interctc_layer6=94.894, loss_interctc_layer12=79.704, loss_interctc_layer15=73.762, loss_interctc_layer21=104.650, loss=91.033, backward_time=0.619, grad_norm=102.407, clip=100.000, loss_scale=1.885e+31, optim_step_time=0.224, optim0_lr0=1.135e-04, train_time=3.661 +[gpub006:0/64] 2024-01-25 07:15:06,703 (trainer:753) INFO: 13epoch:train:6501-6600batch: iter_time=8.849e-05, forward_time=0.281, loss_ctc=104.055, loss_interctc_layer6=108.516, loss_interctc_layer12=92.182, loss_interctc_layer15=85.637, loss_interctc_layer21=106.412, loss=99.360, backward_time=0.759, grad_norm=101.705, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.230, optim0_lr0=1.134e-04, train_time=4.209 +[gpub006:0/64] 2024-01-25 07:22:36,971 (trainer:753) INFO: 13epoch:train:6601-6700batch: iter_time=1.014e-04, forward_time=0.201, loss_ctc=94.218, loss_interctc_layer6=90.718, loss_interctc_layer12=76.010, loss_interctc_layer15=69.909, loss_interctc_layer21=96.694, loss=85.510, backward_time=0.747, grad_norm=87.561, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.134e-04, train_time=4.502 +[gpub006:0/64] 2024-01-25 07:28:43,236 (trainer:753) INFO: 13epoch:train:6701-6800batch: iter_time=9.401e-05, forward_time=0.200, loss_ctc=87.796, loss_interctc_layer6=86.001, loss_interctc_layer12=71.878, loss_interctc_layer15=66.242, loss_interctc_layer21=89.695, loss=80.323, backward_time=0.622, grad_norm=74.383, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.134e-04, train_time=3.662 +[gpub006:0/64] 2024-01-25 07:35:03,063 (trainer:753) INFO: 13epoch:train:6801-6900batch: iter_time=8.976e-05, forward_time=0.201, loss_ctc=97.618, loss_interctc_layer6=93.572, loss_interctc_layer12=78.776, loss_interctc_layer15=73.044, loss_interctc_layer21=99.816, loss=88.565, backward_time=0.641, grad_norm=83.176, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.133e-04, train_time=3.798 +[gpub006:0/64] 2024-01-25 07:41:29,433 (trainer:753) INFO: 13epoch:train:6901-7000batch: iter_time=8.321e-05, forward_time=0.203, loss_ctc=95.411, loss_interctc_layer6=99.308, loss_interctc_layer12=83.897, loss_interctc_layer15=77.678, loss_interctc_layer21=97.689, loss=90.797, backward_time=0.684, grad_norm=85.922, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.133e-04, train_time=3.863 +[gpub006:0/64] 2024-01-25 07:47:35,146 (trainer:753) INFO: 13epoch:train:7001-7100batch: iter_time=9.893e-05, forward_time=0.202, loss_ctc=88.273, loss_interctc_layer6=91.590, loss_interctc_layer12=76.742, loss_interctc_layer15=70.959, loss_interctc_layer21=90.141, loss=83.541, backward_time=0.630, grad_norm=89.689, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.133e-04, train_time=3.657 +[gpub006:0/64] 2024-01-25 07:54:30,333 (trainer:753) INFO: 13epoch:train:7101-7200batch: iter_time=1.062e-04, forward_time=0.213, loss_ctc=98.055, loss_interctc_layer6=95.955, loss_interctc_layer12=80.787, loss_interctc_layer15=74.979, loss_interctc_layer21=100.146, loss=89.985, backward_time=0.634, grad_norm=92.841, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.132e-04, train_time=4.151 +[gpub006:0/64] 2024-01-25 08:00:51,890 (trainer:753) INFO: 13epoch:train:7201-7300batch: iter_time=9.767e-05, forward_time=0.201, loss_ctc=107.837, loss_interctc_layer6=106.072, loss_interctc_layer12=89.213, loss_interctc_layer15=82.981, loss_interctc_layer21=110.648, loss=99.350, backward_time=0.661, grad_norm=275.347, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.132e-04, train_time=3.815 +[gpub006:0/64] 2024-01-25 08:07:12,387 (trainer:753) INFO: 13epoch:train:7301-7400batch: iter_time=9.801e-05, forward_time=0.200, loss_ctc=82.598, loss_interctc_layer6=85.066, loss_interctc_layer12=71.148, loss_interctc_layer15=65.907, loss_interctc_layer21=85.004, loss=77.944, backward_time=0.606, grad_norm=61.370, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.132e-04, train_time=3.806 +[gpub006:0/64] 2024-01-25 08:13:28,355 (trainer:753) INFO: 13epoch:train:7401-7500batch: iter_time=8.672e-05, forward_time=0.201, loss_ctc=90.522, loss_interctc_layer6=91.182, loss_interctc_layer12=76.618, loss_interctc_layer15=71.120, loss_interctc_layer21=92.658, loss=84.420, backward_time=0.584, grad_norm=73.934, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.132e-04, train_time=3.759 +[gpub006:0/64] 2024-01-25 08:13:42,186 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub006:0/64] 2024-01-25 08:14:01,052 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 08:14:04,766 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 08:14:04,766 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub006:0/64] 2024-01-25 08:14:04,769 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 08:30:45,034 (trainer:753) INFO: 13epoch:train:7501-7600batch: iter_time=3.292, forward_time=0.236, loss_ctc=97.669, loss_interctc_layer6=98.176, loss_interctc_layer12=82.975, loss_interctc_layer15=77.106, loss_interctc_layer21=99.850, loss=91.155, backward_time=0.716, grad_norm=89.453, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.131e-04, train_time=10.367 +[gpub006:0/64] 2024-01-25 08:37:25,836 (trainer:753) INFO: 13epoch:train:7601-7700batch: iter_time=8.308e-05, forward_time=0.203, loss_ctc=104.480, loss_interctc_layer6=99.315, loss_interctc_layer12=83.041, loss_interctc_layer15=76.698, loss_interctc_layer21=107.314, loss=94.170, backward_time=0.631, grad_norm=80.031, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.131e-04, train_time=4.008 +[gpub006:0/64] 2024-01-25 08:44:14,787 (trainer:753) INFO: 13epoch:train:7701-7800batch: iter_time=8.890e-05, forward_time=0.202, loss_ctc=95.788, loss_interctc_layer6=95.973, loss_interctc_layer12=81.480, loss_interctc_layer15=75.959, loss_interctc_layer21=98.128, loss=89.466, backward_time=0.672, grad_norm=94.456, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=1.131e-04, train_time=4.089 +[gpub006:0/64] 2024-01-25 08:50:47,782 (trainer:753) INFO: 13epoch:train:7801-7900batch: iter_time=9.093e-05, forward_time=0.202, loss_ctc=94.542, loss_interctc_layer6=104.238, loss_interctc_layer12=87.665, loss_interctc_layer15=81.010, loss_interctc_layer21=96.675, loss=92.826, backward_time=0.664, grad_norm=73.604, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.130e-04, train_time=3.930 +[gpub006:0/64] 2024-01-25 08:57:37,763 (trainer:753) INFO: 13epoch:train:7901-8000batch: iter_time=9.192e-05, forward_time=0.203, loss_ctc=85.378, loss_interctc_layer6=88.441, loss_interctc_layer12=73.910, loss_interctc_layer15=68.035, loss_interctc_layer21=87.412, loss=80.635, backward_time=0.689, grad_norm=80.857, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.130e-04, train_time=4.100 +[gpub006:0/64] 2024-01-25 09:04:14,647 (trainer:753) INFO: 13epoch:train:8001-8100batch: iter_time=9.337e-05, forward_time=0.201, loss_ctc=88.061, loss_interctc_layer6=89.929, loss_interctc_layer12=75.586, loss_interctc_layer15=69.754, loss_interctc_layer21=90.597, loss=82.786, backward_time=0.652, grad_norm=79.227, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.130e-04, train_time=3.969 +[gpub006:0/64] 2024-01-25 09:11:06,793 (trainer:753) INFO: 13epoch:train:8101-8200batch: iter_time=8.820e-05, forward_time=0.205, loss_ctc=84.214, loss_interctc_layer6=92.369, loss_interctc_layer12=77.901, loss_interctc_layer15=72.118, loss_interctc_layer21=86.408, loss=82.602, backward_time=0.659, grad_norm=74.645, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.129e-04, train_time=4.121 +[gpub006:0/64] 2024-01-25 09:18:12,784 (trainer:753) INFO: 13epoch:train:8201-8300batch: iter_time=9.027e-05, forward_time=0.202, loss_ctc=89.174, loss_interctc_layer6=96.615, loss_interctc_layer12=81.148, loss_interctc_layer15=75.409, loss_interctc_layer21=91.138, loss=86.697, backward_time=0.689, grad_norm=101.103, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=1.129e-04, train_time=4.260 +[gpub006:0/64] 2024-01-25 09:24:14,944 (trainer:753) INFO: 13epoch:train:8301-8400batch: iter_time=9.272e-05, forward_time=0.254, loss_ctc=96.358, loss_interctc_layer6=95.718, loss_interctc_layer12=80.264, loss_interctc_layer15=74.204, loss_interctc_layer21=98.504, loss=89.010, backward_time=0.562, grad_norm=72.235, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.129e-04, train_time=3.620 +[gpub006:0/64] 2024-01-25 09:30:13,421 (trainer:753) INFO: 13epoch:train:8401-8500batch: iter_time=9.041e-05, forward_time=0.221, loss_ctc=90.449, loss_interctc_layer6=101.279, loss_interctc_layer12=85.303, loss_interctc_layer15=79.166, loss_interctc_layer21=92.810, loss=89.801, backward_time=0.594, grad_norm=88.258, clip=100.000, loss_scale=1.156e+31, optim_step_time=0.226, optim0_lr0=1.129e-04, train_time=3.585 +[gpub006:0/64] 2024-01-25 09:37:13,501 (trainer:753) INFO: 13epoch:train:8501-8600batch: iter_time=8.948e-05, forward_time=0.221, loss_ctc=86.720, loss_interctc_layer6=89.836, loss_interctc_layer12=75.007, loss_interctc_layer15=69.065, loss_interctc_layer21=89.048, loss=81.935, backward_time=0.734, grad_norm=65.634, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.231, optim0_lr0=1.128e-04, train_time=4.202 +[gpub006:0/64] 2024-01-25 09:42:53,224 (trainer:753) INFO: 13epoch:train:8601-8700batch: iter_time=8.360e-05, forward_time=0.201, loss_ctc=88.537, loss_interctc_layer6=87.992, loss_interctc_layer12=74.066, loss_interctc_layer15=68.879, loss_interctc_layer21=90.530, loss=82.001, backward_time=0.557, grad_norm=81.483, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.128e-04, train_time=3.397 +[gpub006:0/64] 2024-01-25 09:46:18,882 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub006:0/64] 2024-01-25 09:46:37,566 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 09:46:41,005 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 09:46:41,005 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub006:0/64] 2024-01-25 09:46:41,008 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 10:02:46,022 (trainer:753) INFO: 13epoch:train:8701-8800batch: iter_time=3.359, forward_time=0.233, loss_ctc=97.466, loss_interctc_layer6=96.763, loss_interctc_layer12=81.150, loss_interctc_layer15=75.093, loss_interctc_layer21=99.670, loss=90.028, backward_time=0.596, grad_norm=71.576, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.128e-04, train_time=11.927 +[gpub006:0/64] 2024-01-25 10:08:43,508 (trainer:753) INFO: 13epoch:train:8801-8900batch: iter_time=8.535e-05, forward_time=0.203, loss_ctc=97.537, loss_interctc_layer6=98.045, loss_interctc_layer12=82.416, loss_interctc_layer15=76.293, loss_interctc_layer21=99.702, loss=90.799, backward_time=0.576, grad_norm=121.051, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.127e-04, train_time=3.575 +[gpub006:0/64] 2024-01-25 10:12:47,219 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-25 10:15:12,019 (trainer:753) INFO: 13epoch:train:8901-9000batch: iter_time=8.847e-05, forward_time=0.201, loss_ctc=96.360, loss_interctc_layer6=93.449, loss_interctc_layer12=78.208, loss_interctc_layer15=72.276, loss_interctc_layer21=99.213, loss=87.901, backward_time=0.657, grad_norm=82.046, clip=100.000, loss_scale=1.639e+31, optim_step_time=0.225, optim0_lr0=1.127e-04, train_time=3.885 +[gpub006:0/64] 2024-01-25 10:21:52,884 (trainer:753) INFO: 13epoch:train:9001-9100batch: iter_time=9.185e-05, forward_time=0.205, loss_ctc=96.142, loss_interctc_layer6=107.175, loss_interctc_layer12=90.615, loss_interctc_layer15=84.076, loss_interctc_layer21=98.521, loss=95.306, backward_time=0.658, grad_norm=112.027, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.127e-04, train_time=4.008 +[gpub006:0/64] 2024-01-25 10:27:34,246 (trainer:753) INFO: 13epoch:train:9101-9200batch: iter_time=9.032e-05, forward_time=0.237, loss_ctc=86.690, loss_interctc_layer6=90.702, loss_interctc_layer12=75.759, loss_interctc_layer15=69.830, loss_interctc_layer21=89.176, loss=82.431, backward_time=0.548, grad_norm=74.912, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.126e-04, train_time=3.412 +[gpub006:0/64] 2024-01-25 10:34:27,193 (trainer:753) INFO: 13epoch:train:9201-9300batch: iter_time=9.401e-05, forward_time=0.255, loss_ctc=80.418, loss_interctc_layer6=85.435, loss_interctc_layer12=71.318, loss_interctc_layer15=65.781, loss_interctc_layer21=82.602, loss=77.111, backward_time=0.665, grad_norm=68.302, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=1.126e-04, train_time=4.130 +[gpub006:0/64] 2024-01-25 10:40:48,904 (trainer:753) INFO: 13epoch:train:9301-9400batch: iter_time=8.720e-05, forward_time=0.208, loss_ctc=91.430, loss_interctc_layer6=93.081, loss_interctc_layer12=78.511, loss_interctc_layer15=72.789, loss_interctc_layer21=93.739, loss=85.910, backward_time=0.666, grad_norm=78.279, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.126e-04, train_time=3.817 +[gpub006:0/64] 2024-01-25 10:47:30,457 (trainer:753) INFO: 13epoch:train:9401-9500batch: iter_time=8.748e-05, forward_time=0.202, loss_ctc=90.092, loss_interctc_layer6=98.402, loss_interctc_layer12=83.192, loss_interctc_layer15=77.051, loss_interctc_layer21=92.142, loss=88.176, backward_time=0.649, grad_norm=101.691, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.126e-04, train_time=4.015 +[gpub006:0/64] 2024-01-25 10:53:19,124 (trainer:753) INFO: 13epoch:train:9501-9600batch: iter_time=8.648e-05, forward_time=0.202, loss_ctc=85.497, loss_interctc_layer6=91.896, loss_interctc_layer12=76.636, loss_interctc_layer15=70.811, loss_interctc_layer21=87.467, loss=82.461, backward_time=0.602, grad_norm=66.269, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.125e-04, train_time=3.486 +[gpub006:0/64] 2024-01-25 10:59:21,955 (trainer:753) INFO: 13epoch:train:9601-9700batch: iter_time=8.494e-05, forward_time=0.203, loss_ctc=93.320, loss_interctc_layer6=95.330, loss_interctc_layer12=80.341, loss_interctc_layer15=74.372, loss_interctc_layer21=95.772, loss=87.827, backward_time=0.560, grad_norm=78.185, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.125e-04, train_time=3.628 +[gpub006:0/64] 2024-01-25 11:05:22,459 (trainer:753) INFO: 13epoch:train:9701-9800batch: iter_time=8.970e-05, forward_time=0.226, loss_ctc=103.596, loss_interctc_layer6=104.804, loss_interctc_layer12=87.820, loss_interctc_layer15=81.934, loss_interctc_layer21=106.639, loss=96.958, backward_time=0.543, grad_norm=88.653, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.125e-04, train_time=3.605 +[gpub006:0/64] 2024-01-25 11:12:43,232 (trainer:753) INFO: 13epoch:train:9801-9900batch: iter_time=9.114e-05, forward_time=0.214, loss_ctc=77.902, loss_interctc_layer6=84.449, loss_interctc_layer12=70.646, loss_interctc_layer15=65.317, loss_interctc_layer21=79.677, loss=75.598, backward_time=0.704, grad_norm=64.670, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.124e-04, train_time=4.407 +[gpub006:0/64] 2024-01-25 11:18:42,744 (trainer:753) INFO: 13epoch:train:9901-10000batch: iter_time=8.734e-05, forward_time=0.213, loss_ctc=87.638, loss_interctc_layer6=90.461, loss_interctc_layer12=76.059, loss_interctc_layer15=69.981, loss_interctc_layer21=89.646, loss=82.757, backward_time=0.550, grad_norm=72.186, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.124e-04, train_time=3.594 +[gpub006:0/64] 2024-01-25 11:19:02,774 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub006:0/64] 2024-01-25 11:19:21,212 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 11:19:24,627 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 11:19:24,627 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub006:0/64] 2024-01-25 11:19:24,690 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 11:33:27,499 (trainer:753) INFO: 13epoch:train:10001-10100batch: iter_time=2.836, forward_time=0.228, loss_ctc=100.389, loss_interctc_layer6=97.363, loss_interctc_layer12=82.430, loss_interctc_layer15=76.291, loss_interctc_layer21=103.138, loss=91.922, backward_time=0.642, grad_norm=78.235, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=1.124e-04, train_time=8.847 +[gpub006:0/64] 2024-01-25 11:41:35,332 (trainer:753) INFO: 13epoch:train:10101-10200batch: iter_time=8.393e-05, forward_time=0.202, loss_ctc=108.922, loss_interctc_layer6=99.264, loss_interctc_layer12=82.902, loss_interctc_layer15=76.604, loss_interctc_layer21=111.894, loss=95.917, backward_time=0.998, grad_norm=102.189, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.123e-04, train_time=4.878 +[gpub006:0/64] 2024-01-25 11:50:11,899 (trainer:753) INFO: 13epoch:train:10201-10300batch: iter_time=9.686e-05, forward_time=0.202, loss_ctc=98.643, loss_interctc_layer6=95.732, loss_interctc_layer12=80.909, loss_interctc_layer15=75.290, loss_interctc_layer21=101.434, loss=90.402, backward_time=0.988, grad_norm=82.733, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.123e-04, train_time=5.165 +[gpub006:0/64] 2024-01-25 11:56:53,598 (trainer:753) INFO: 13epoch:train:10301-10400batch: iter_time=9.441e-05, forward_time=0.205, loss_ctc=97.605, loss_interctc_layer6=102.747, loss_interctc_layer12=85.920, loss_interctc_layer15=79.324, loss_interctc_layer21=100.007, loss=93.121, backward_time=0.662, grad_norm=75.186, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.123e-04, train_time=4.017 +[gpub006:0/64] 2024-01-25 12:03:17,095 (trainer:753) INFO: 13epoch:train:10401-10500batch: iter_time=1.004e-04, forward_time=0.201, loss_ctc=94.366, loss_interctc_layer6=88.969, loss_interctc_layer12=74.436, loss_interctc_layer15=68.814, loss_interctc_layer21=96.841, loss=84.685, backward_time=0.611, grad_norm=80.702, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.123e-04, train_time=3.835 +[gpub006:0/64] 2024-01-25 12:09:09,862 (trainer:753) INFO: 13epoch:train:10501-10600batch: iter_time=9.816e-05, forward_time=0.201, loss_ctc=92.965, loss_interctc_layer6=89.260, loss_interctc_layer12=74.860, loss_interctc_layer15=69.211, loss_interctc_layer21=94.707, loss=84.201, backward_time=0.663, grad_norm=77.558, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.122e-04, train_time=3.527 +[gpub006:0/64] 2024-01-25 12:15:28,274 (trainer:753) INFO: 13epoch:train:10601-10700batch: iter_time=8.854e-05, forward_time=0.230, loss_ctc=88.316, loss_interctc_layer6=92.420, loss_interctc_layer12=77.496, loss_interctc_layer15=71.724, loss_interctc_layer21=90.344, loss=84.060, backward_time=0.637, grad_norm=78.918, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.122e-04, train_time=3.784 +[gpub006:0/64] 2024-01-25 12:22:04,637 (trainer:753) INFO: 13epoch:train:10701-10800batch: iter_time=9.696e-05, forward_time=0.202, loss_ctc=94.602, loss_interctc_layer6=95.934, loss_interctc_layer12=80.534, loss_interctc_layer15=74.659, loss_interctc_layer21=97.079, loss=88.562, backward_time=0.694, grad_norm=79.673, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.122e-04, train_time=3.963 +[gpub006:0/64] 2024-01-25 12:31:03,300 (trainer:753) INFO: 13epoch:train:10801-10900batch: iter_time=9.217e-05, forward_time=0.332, loss_ctc=98.622, loss_interctc_layer6=95.649, loss_interctc_layer12=80.157, loss_interctc_layer15=74.090, loss_interctc_layer21=100.963, loss=89.896, backward_time=0.873, grad_norm=75.298, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.231, optim0_lr0=1.121e-04, train_time=5.385 +[gpub006:0/64] 2024-01-25 12:37:30,650 (trainer:753) INFO: 13epoch:train:10901-11000batch: iter_time=1.007e-04, forward_time=0.201, loss_ctc=95.131, loss_interctc_layer6=100.760, loss_interctc_layer12=85.381, loss_interctc_layer15=79.039, loss_interctc_layer21=97.834, loss=91.629, backward_time=0.581, grad_norm=87.980, clip=100.000, loss_scale=1.399e+31, optim_step_time=0.224, optim0_lr0=1.121e-04, train_time=3.873 +[gpub006:0/64] 2024-01-25 12:43:27,624 (trainer:753) INFO: 13epoch:train:11001-11100batch: iter_time=9.274e-05, forward_time=0.200, loss_ctc=87.804, loss_interctc_layer6=88.496, loss_interctc_layer12=73.478, loss_interctc_layer15=67.675, loss_interctc_layer21=90.111, loss=81.513, backward_time=0.561, grad_norm=80.953, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.121e-04, train_time=3.571 +[gpub006:0/64] 2024-01-25 12:49:17,612 (trainer:753) INFO: 13epoch:train:11101-11200batch: iter_time=8.807e-05, forward_time=0.201, loss_ctc=91.384, loss_interctc_layer6=87.886, loss_interctc_layer12=74.064, loss_interctc_layer15=68.526, loss_interctc_layer21=94.065, loss=83.185, backward_time=0.613, grad_norm=82.448, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.121e-04, train_time=3.500 +[gpub006:0/64] 2024-01-25 12:53:28,405 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub006:0/64] 2024-01-25 12:53:47,242 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 12:53:50,740 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 12:53:50,740 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub006:0/64] 2024-01-25 12:53:50,907 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 13:03:13,890 (trainer:753) INFO: 13epoch:train:11201-11300batch: iter_time=3.348, forward_time=0.249, loss_ctc=97.029, loss_interctc_layer6=96.374, loss_interctc_layer12=80.735, loss_interctc_layer15=74.696, loss_interctc_layer21=99.045, loss=89.576, backward_time=0.703, grad_norm=79.253, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.120e-04, train_time=8.362 +[gpub006:0/64] 2024-01-25 13:10:21,045 (trainer:753) INFO: 13epoch:train:11301-11400batch: iter_time=8.840e-05, forward_time=0.203, loss_ctc=95.463, loss_interctc_layer6=97.783, loss_interctc_layer12=81.953, loss_interctc_layer15=75.866, loss_interctc_layer21=97.834, loss=89.780, backward_time=0.694, grad_norm=76.545, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.120e-04, train_time=4.272 +[gpub006:0/64] 2024-01-25 13:18:40,996 (trainer:753) INFO: 13epoch:train:11401-11500batch: iter_time=8.983e-05, forward_time=0.202, loss_ctc=96.951, loss_interctc_layer6=93.383, loss_interctc_layer12=77.986, loss_interctc_layer15=72.054, loss_interctc_layer21=99.701, loss=88.015, backward_time=0.846, grad_norm=81.055, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.120e-04, train_time=4.999 +[gpub006:0/64] 2024-01-25 13:28:25,667 (trainer:753) INFO: 13epoch:train:11501-11600batch: iter_time=9.897e-05, forward_time=0.203, loss_ctc=96.884, loss_interctc_layer6=107.327, loss_interctc_layer12=90.578, loss_interctc_layer15=84.052, loss_interctc_layer21=99.100, loss=95.588, backward_time=1.248, grad_norm=84.505, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.119e-04, train_time=5.846 +[gpub006:0/64] 2024-01-25 13:37:18,539 (trainer:753) INFO: 13epoch:train:11601-11700batch: iter_time=1.008e-04, forward_time=0.201, loss_ctc=87.966, loss_interctc_layer6=90.737, loss_interctc_layer12=75.651, loss_interctc_layer15=69.766, loss_interctc_layer21=90.217, loss=82.868, backward_time=1.104, grad_norm=77.627, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.119e-04, train_time=5.327 +[gpub006:0/64] 2024-01-25 13:45:40,870 (trainer:753) INFO: 13epoch:train:11701-11800batch: iter_time=9.752e-05, forward_time=0.201, loss_ctc=79.743, loss_interctc_layer6=84.822, loss_interctc_layer12=70.748, loss_interctc_layer15=65.316, loss_interctc_layer21=81.871, loss=76.500, backward_time=1.065, grad_norm=77.301, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.119e-04, train_time=5.025 +[gpub006:0/64] 2024-01-25 13:54:00,776 (trainer:753) INFO: 13epoch:train:11801-11900batch: iter_time=9.627e-05, forward_time=0.202, loss_ctc=89.921, loss_interctc_layer6=91.890, loss_interctc_layer12=77.309, loss_interctc_layer15=71.780, loss_interctc_layer21=92.388, loss=84.657, backward_time=1.103, grad_norm=80.143, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.118e-04, train_time=4.999 +[gpub006:0/64] 2024-01-25 14:00:18,718 (trainer:753) INFO: 13epoch:train:11901-12000batch: iter_time=9.456e-05, forward_time=0.202, loss_ctc=90.447, loss_interctc_layer6=98.540, loss_interctc_layer12=83.261, loss_interctc_layer15=77.246, loss_interctc_layer21=92.759, loss=88.451, backward_time=0.690, grad_norm=82.066, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.118e-04, train_time=3.779 +[gpub006:0/64] 2024-01-25 14:07:45,428 (trainer:753) INFO: 13epoch:train:12001-12100batch: iter_time=9.548e-05, forward_time=0.314, loss_ctc=83.846, loss_interctc_layer6=90.524, loss_interctc_layer12=75.564, loss_interctc_layer15=69.757, loss_interctc_layer21=85.791, loss=81.096, backward_time=0.786, grad_norm=92.981, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.231, optim0_lr0=1.118e-04, train_time=4.467 +[gpub006:0/64] 2024-01-25 14:14:48,770 (trainer:753) INFO: 13epoch:train:12101-12200batch: iter_time=9.582e-05, forward_time=0.202, loss_ctc=91.264, loss_interctc_layer6=94.827, loss_interctc_layer12=79.362, loss_interctc_layer15=73.209, loss_interctc_layer21=93.520, loss=86.436, backward_time=0.848, grad_norm=78.619, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.118e-04, train_time=4.233 +[gpub006:0/64] 2024-01-25 14:21:20,346 (trainer:753) INFO: 13epoch:train:12201-12300batch: iter_time=9.769e-05, forward_time=0.202, loss_ctc=103.468, loss_interctc_layer6=104.988, loss_interctc_layer12=88.172, loss_interctc_layer15=81.647, loss_interctc_layer21=105.603, loss=96.775, backward_time=0.633, grad_norm=82.151, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.117e-04, train_time=3.916 +[gpub006:0/64] 2024-01-25 14:27:46,941 (trainer:753) INFO: 13epoch:train:12301-12400batch: iter_time=1.022e-04, forward_time=0.201, loss_ctc=77.662, loss_interctc_layer6=84.738, loss_interctc_layer12=71.141, loss_interctc_layer15=65.542, loss_interctc_layer21=79.599, loss=75.736, backward_time=0.638, grad_norm=93.021, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.117e-04, train_time=3.866 +[gpub006:0/64] 2024-01-25 14:33:32,383 (trainer:753) INFO: 13epoch:train:12401-12500batch: iter_time=9.225e-05, forward_time=0.201, loss_ctc=86.663, loss_interctc_layer6=90.435, loss_interctc_layer12=75.611, loss_interctc_layer15=69.782, loss_interctc_layer21=88.602, loss=82.219, backward_time=0.567, grad_norm=72.597, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.117e-04, train_time=3.454 +[gpub006:0/64] 2024-01-25 14:33:52,413 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub006:0/64] 2024-01-25 14:34:10,961 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 14:34:14,375 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 14:34:14,375 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub006:0/64] 2024-01-25 14:34:14,447 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 14:48:22,446 (trainer:753) INFO: 13epoch:train:12501-12600batch: iter_time=3.209, forward_time=0.250, loss_ctc=100.469, loss_interctc_layer6=98.032, loss_interctc_layer12=82.547, loss_interctc_layer15=76.472, loss_interctc_layer21=102.974, loss=92.099, backward_time=0.475, grad_norm=73.924, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.116e-04, train_time=8.899 +[gpub006:0/64] 2024-01-25 14:54:16,658 (trainer:753) INFO: 13epoch:train:12601-12700batch: iter_time=8.744e-05, forward_time=0.203, loss_ctc=108.167, loss_interctc_layer6=98.590, loss_interctc_layer12=82.101, loss_interctc_layer15=75.680, loss_interctc_layer21=110.504, loss=95.009, backward_time=0.540, grad_norm=71.173, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.116e-04, train_time=3.543 +[gpub006:0/64] 2024-01-25 15:00:49,540 (trainer:753) INFO: 13epoch:train:12701-12800batch: iter_time=8.609e-05, forward_time=0.202, loss_ctc=98.519, loss_interctc_layer6=95.950, loss_interctc_layer12=80.825, loss_interctc_layer15=74.917, loss_interctc_layer21=101.027, loss=90.248, backward_time=0.779, grad_norm=91.212, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.116e-04, train_time=3.929 +[gpub006:0/64] 2024-01-25 15:07:30,824 (trainer:753) INFO: 13epoch:train:12801-12900batch: iter_time=9.441e-05, forward_time=0.201, loss_ctc=97.705, loss_interctc_layer6=103.013, loss_interctc_layer12=86.033, loss_interctc_layer15=79.344, loss_interctc_layer21=100.247, loss=93.268, backward_time=0.709, grad_norm=82.012, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.224, optim0_lr0=1.116e-04, train_time=4.013 +[gpub006:0/64] 2024-01-25 15:14:14,518 (trainer:753) INFO: 13epoch:train:12901-13000batch: iter_time=9.477e-05, forward_time=0.258, loss_ctc=93.134, loss_interctc_layer6=88.230, loss_interctc_layer12=73.540, loss_interctc_layer15=67.730, loss_interctc_layer21=95.729, loss=83.673, backward_time=0.652, grad_norm=79.490, clip=100.000, loss_scale=2.799e+31, optim_step_time=0.226, optim0_lr0=1.115e-04, train_time=4.037 +[gpub006:0/64] 2024-01-25 15:20:02,482 (trainer:753) INFO: 13epoch:train:13001-13100batch: iter_time=8.673e-05, forward_time=0.257, loss_ctc=92.536, loss_interctc_layer6=89.290, loss_interctc_layer12=74.790, loss_interctc_layer15=69.017, loss_interctc_layer21=94.929, loss=84.112, backward_time=0.608, grad_norm=77.267, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.115e-04, train_time=3.479 +[gpub006:0/64] 2024-01-25 15:26:36,140 (trainer:753) INFO: 13epoch:train:13101-13200batch: iter_time=9.087e-05, forward_time=0.288, loss_ctc=88.178, loss_interctc_layer6=92.062, loss_interctc_layer12=77.306, loss_interctc_layer15=71.583, loss_interctc_layer21=89.999, loss=83.826, backward_time=0.607, grad_norm=97.809, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.115e-04, train_time=3.936 +[gpub006:0/64] 2024-01-25 15:32:54,339 (trainer:753) INFO: 13epoch:train:13201-13300batch: iter_time=9.448e-05, forward_time=0.202, loss_ctc=93.645, loss_interctc_layer6=95.860, loss_interctc_layer12=80.456, loss_interctc_layer15=74.319, loss_interctc_layer21=95.843, loss=88.025, backward_time=0.555, grad_norm=88.399, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.114e-04, train_time=3.781 +[gpub006:0/64] 2024-01-25 15:38:39,005 (trainer:753) INFO: 13epoch:train:13301-13400batch: iter_time=8.866e-05, forward_time=0.202, loss_ctc=97.643, loss_interctc_layer6=95.213, loss_interctc_layer12=79.346, loss_interctc_layer15=73.240, loss_interctc_layer21=100.058, loss=89.100, backward_time=0.565, grad_norm=113.773, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.114e-04, train_time=3.447 +[gpub006:0/64] 2024-01-25 15:44:49,892 (trainer:753) INFO: 13epoch:train:13401-13500batch: iter_time=9.201e-05, forward_time=0.201, loss_ctc=95.802, loss_interctc_layer6=100.592, loss_interctc_layer12=84.800, loss_interctc_layer15=78.459, loss_interctc_layer21=98.031, loss=91.537, backward_time=0.585, grad_norm=90.544, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.114e-04, train_time=3.709 +[gpub006:0/64] 2024-01-25 15:50:38,408 (trainer:753) INFO: 13epoch:train:13501-13600batch: iter_time=9.307e-05, forward_time=0.201, loss_ctc=88.191, loss_interctc_layer6=89.203, loss_interctc_layer12=74.311, loss_interctc_layer15=68.389, loss_interctc_layer21=90.274, loss=82.074, backward_time=0.560, grad_norm=94.484, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.114e-04, train_time=3.485 +[gpub006:0/64] 2024-01-25 15:56:47,760 (trainer:753) INFO: 13epoch:train:13601-13700batch: iter_time=9.229e-05, forward_time=0.200, loss_ctc=91.583, loss_interctc_layer6=87.098, loss_interctc_layer12=73.107, loss_interctc_layer15=67.657, loss_interctc_layer21=93.688, loss=82.627, backward_time=0.571, grad_norm=84.225, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.113e-04, train_time=3.693 +[gpub006:0/64] 2024-01-25 16:00:20,736 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub006:0/64] 2024-01-25 16:00:39,653 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 16:00:43,115 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 16:00:43,115 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub006:0/64] 2024-01-25 16:00:43,153 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 16:12:56,773 (trainer:753) INFO: 13epoch:train:13701-13800batch: iter_time=3.653, forward_time=0.244, loss_ctc=96.167, loss_interctc_layer6=95.366, loss_interctc_layer12=79.776, loss_interctc_layer15=73.460, loss_interctc_layer21=98.872, loss=88.728, backward_time=0.549, grad_norm=101.220, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.113e-04, train_time=9.690 +[gpub006:0/64] 2024-01-25 16:18:02,797 (trainer:753) INFO: 13epoch:train:13801-13900batch: iter_time=7.880e-05, forward_time=0.202, loss_ctc=95.954, loss_interctc_layer6=97.889, loss_interctc_layer12=82.402, loss_interctc_layer15=76.227, loss_interctc_layer21=98.425, loss=90.179, backward_time=0.481, grad_norm=102.712, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.113e-04, train_time=3.060 +[gpub006:0/64] 2024-01-25 16:24:36,945 (trainer:753) INFO: 13epoch:train:13901-14000batch: iter_time=8.246e-05, forward_time=0.201, loss_ctc=96.348, loss_interctc_layer6=93.321, loss_interctc_layer12=78.426, loss_interctc_layer15=72.260, loss_interctc_layer21=98.670, loss=87.805, backward_time=0.619, grad_norm=81.350, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.112e-04, train_time=3.941 +[gpub006:0/64] 2024-01-25 16:30:35,865 (trainer:753) INFO: 13epoch:train:14001-14100batch: iter_time=8.920e-05, forward_time=0.202, loss_ctc=96.119, loss_interctc_layer6=107.221, loss_interctc_layer12=90.495, loss_interctc_layer15=84.051, loss_interctc_layer21=98.459, loss=95.269, backward_time=0.555, grad_norm=74.738, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.112e-04, train_time=3.589 +[gpub006:0/64] 2024-01-25 16:36:05,117 (trainer:753) INFO: 13epoch:train:14101-14200batch: iter_time=8.891e-05, forward_time=0.200, loss_ctc=86.676, loss_interctc_layer6=90.057, loss_interctc_layer12=74.971, loss_interctc_layer15=68.773, loss_interctc_layer21=88.875, loss=81.870, backward_time=0.507, grad_norm=78.280, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.112e-04, train_time=3.292 +[gpub006:0/64] 2024-01-25 16:42:13,808 (trainer:753) INFO: 13epoch:train:14201-14300batch: iter_time=8.444e-05, forward_time=0.200, loss_ctc=79.215, loss_interctc_layer6=84.644, loss_interctc_layer12=70.821, loss_interctc_layer15=64.892, loss_interctc_layer21=81.799, loss=76.274, backward_time=0.587, grad_norm=103.610, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.112e-04, train_time=3.687 +[gpub006:0/64] 2024-01-25 16:47:53,830 (trainer:753) INFO: 13epoch:train:14301-14400batch: iter_time=8.658e-05, forward_time=0.201, loss_ctc=90.607, loss_interctc_layer6=92.497, loss_interctc_layer12=77.475, loss_interctc_layer15=71.763, loss_interctc_layer21=93.062, loss=85.081, backward_time=0.531, grad_norm=87.103, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.111e-04, train_time=3.400 +[gpub006:0/64] 2024-01-25 16:53:56,683 (trainer:753) INFO: 13epoch:train:14401-14500batch: iter_time=8.632e-05, forward_time=0.252, loss_ctc=89.827, loss_interctc_layer6=98.229, loss_interctc_layer12=82.453, loss_interctc_layer15=76.359, loss_interctc_layer21=92.078, loss=87.789, backward_time=0.608, grad_norm=74.944, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.111e-04, train_time=3.625 +[gpub006:0/64] 2024-01-25 16:59:51,776 (trainer:753) INFO: 13epoch:train:14501-14600batch: iter_time=8.577e-05, forward_time=0.259, loss_ctc=83.998, loss_interctc_layer6=91.364, loss_interctc_layer12=76.445, loss_interctc_layer15=70.537, loss_interctc_layer21=86.057, loss=81.680, backward_time=0.571, grad_norm=86.932, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.111e-04, train_time=3.554 +[gpub006:0/64] 2024-01-25 17:05:31,105 (trainer:753) INFO: 13epoch:train:14601-14700batch: iter_time=8.513e-05, forward_time=0.202, loss_ctc=90.988, loss_interctc_layer6=94.780, loss_interctc_layer12=79.464, loss_interctc_layer15=73.414, loss_interctc_layer21=93.341, loss=86.397, backward_time=0.549, grad_norm=72.803, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.110e-04, train_time=3.394 +[gpub006:0/64] 2024-01-25 17:12:31,942 (trainer:753) INFO: 13epoch:train:14701-14800batch: iter_time=8.808e-05, forward_time=0.200, loss_ctc=103.815, loss_interctc_layer6=104.040, loss_interctc_layer12=87.164, loss_interctc_layer15=80.642, loss_interctc_layer21=105.106, loss=96.153, backward_time=0.631, grad_norm=88.297, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.110e-04, train_time=4.208 +[gpub006:0/64] 2024-01-25 17:18:09,280 (trainer:753) INFO: 13epoch:train:14801-14900batch: iter_time=9.045e-05, forward_time=0.199, loss_ctc=77.052, loss_interctc_layer6=83.615, loss_interctc_layer12=69.941, loss_interctc_layer15=64.675, loss_interctc_layer21=78.687, loss=74.794, backward_time=0.547, grad_norm=73.925, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.110e-04, train_time=3.373 +[gpub006:0/64] 2024-01-25 17:24:22,549 (trainer:753) INFO: 13epoch:train:14901-15000batch: iter_time=8.738e-05, forward_time=0.201, loss_ctc=86.853, loss_interctc_layer6=90.390, loss_interctc_layer12=75.726, loss_interctc_layer15=70.283, loss_interctc_layer21=88.420, loss=82.334, backward_time=0.683, grad_norm=77.700, clip=100.000, loss_scale=5.598e+31, optim_step_time=0.224, optim0_lr0=1.110e-04, train_time=3.732 +[gpub006:0/64] 2024-01-25 18:00:18,523 (trainer:352) INFO: 13epoch results: [train] iter_time=0.244, forward_time=0.214, loss_ctc=94.252, loss_interctc_layer6=95.321, loss_interctc_layer12=80.195, loss_interctc_layer15=74.293, loss_interctc_layer21=96.503, loss=88.113, backward_time=0.674, grad_norm=85.133, clip=100.000, loss_scale=1.994e+31, optim_step_time=0.225, optim0_lr0=1.132e-04, train_time=4.528, time=18 hours, 52 minutes and 31.82 seconds, total_count=195000, gpu_max_cached_mem_GB=34.396, [valid] loss_ctc=55.119, cer_ctc=0.249, loss_interctc_layer6=58.615, cer_interctc_layer6=0.258, loss_interctc_layer12=45.802, cer_interctc_layer12=0.192, loss_interctc_layer15=41.371, cer_interctc_layer15=0.166, loss_interctc_layer21=57.424, cer_interctc_layer21=0.260, loss=51.666, time=35 minutes and 31.16 seconds, total_count=60723, gpu_max_cached_mem_GB=34.396 +[gpub006:0/64] 2024-01-25 18:00:45,136 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count +[gpub006:0/64] 2024-01-25 18:00:45,198 (trainer:286) INFO: 14/45epoch started. Estimated time to finish: 3 weeks, 4 days and 23 hours +[gpub006:0/64] 2024-01-25 18:00:45,238 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub006:0/64] 2024-01-25 18:01:03,318 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 18:01:06,703 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 18:01:06,703 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub006:0/64] 2024-01-25 18:01:06,722 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 18:13:08,310 (trainer:753) INFO: 14epoch:train:1-100batch: iter_time=2.725, forward_time=0.241, loss_ctc=78.749, loss_interctc_layer6=87.141, loss_interctc_layer12=73.643, loss_interctc_layer15=68.084, loss_interctc_layer21=80.751, loss=77.673, backward_time=0.475, grad_norm=76.034, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.109e-04, train_time=7.429 +[gpub006:0/64] 2024-01-25 18:18:37,025 (trainer:753) INFO: 14epoch:train:101-200batch: iter_time=8.573e-05, forward_time=0.201, loss_ctc=83.043, loss_interctc_layer6=91.615, loss_interctc_layer12=78.380, loss_interctc_layer15=72.716, loss_interctc_layer21=84.748, loss=82.100, backward_time=0.520, grad_norm=93.086, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.109e-04, train_time=3.288 +[gpub006:0/64] 2024-01-25 18:24:06,142 (trainer:753) INFO: 14epoch:train:201-300batch: iter_time=8.914e-05, forward_time=0.202, loss_ctc=90.550, loss_interctc_layer6=89.712, loss_interctc_layer12=75.951, loss_interctc_layer15=70.607, loss_interctc_layer21=92.473, loss=83.859, backward_time=0.528, grad_norm=83.112, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.109e-04, train_time=3.291 +[gpub006:0/64] 2024-01-25 18:29:24,255 (trainer:753) INFO: 14epoch:train:301-400batch: iter_time=9.986e-05, forward_time=0.201, loss_ctc=88.378, loss_interctc_layer6=79.352, loss_interctc_layer12=66.839, loss_interctc_layer15=62.059, loss_interctc_layer21=90.581, loss=77.442, backward_time=0.516, grad_norm=68.520, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.108e-04, train_time=3.181 +[gpub006:0/64] 2024-01-25 18:35:13,131 (trainer:753) INFO: 14epoch:train:401-500batch: iter_time=9.883e-05, forward_time=0.236, loss_ctc=91.857, loss_interctc_layer6=87.147, loss_interctc_layer12=72.929, loss_interctc_layer15=67.235, loss_interctc_layer21=94.362, loss=82.706, backward_time=0.545, grad_norm=71.599, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.229, optim0_lr0=1.108e-04, train_time=3.488 +[gpub006:0/64] 2024-01-25 18:40:48,199 (trainer:753) INFO: 14epoch:train:501-600batch: iter_time=1.008e-04, forward_time=0.213, loss_ctc=82.006, loss_interctc_layer6=91.833, loss_interctc_layer12=77.895, loss_interctc_layer15=72.349, loss_interctc_layer21=83.487, loss=81.514, backward_time=0.551, grad_norm=86.220, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.108e-04, train_time=3.351 +[gpub006:0/64] 2024-01-25 18:46:33,757 (trainer:753) INFO: 14epoch:train:601-700batch: iter_time=9.771e-05, forward_time=0.227, loss_ctc=83.781, loss_interctc_layer6=90.242, loss_interctc_layer12=76.910, loss_interctc_layer15=71.696, loss_interctc_layer21=85.456, loss=81.617, backward_time=0.532, grad_norm=85.544, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.108e-04, train_time=3.455 +[gpub006:0/64] 2024-01-25 18:52:39,514 (trainer:753) INFO: 14epoch:train:701-800batch: iter_time=1.134e-04, forward_time=0.207, loss_ctc=103.776, loss_interctc_layer6=105.511, loss_interctc_layer12=89.398, loss_interctc_layer15=83.166, loss_interctc_layer21=105.629, loss=97.496, backward_time=0.574, grad_norm=84.628, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.107e-04, train_time=3.657 +[gpub006:0/64] 2024-01-25 18:57:56,541 (trainer:753) INFO: 14epoch:train:801-900batch: iter_time=1.038e-04, forward_time=0.206, loss_ctc=103.082, loss_interctc_layer6=97.490, loss_interctc_layer12=82.657, loss_interctc_layer15=76.733, loss_interctc_layer21=106.030, loss=93.198, backward_time=0.500, grad_norm=78.663, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.230, optim0_lr0=1.107e-04, train_time=3.166 +[gpub006:0/64] 2024-01-25 19:03:05,562 (trainer:753) INFO: 14epoch:train:901-1000batch: iter_time=1.119e-04, forward_time=0.203, loss_ctc=93.287, loss_interctc_layer6=88.303, loss_interctc_layer12=74.581, loss_interctc_layer15=69.285, loss_interctc_layer21=95.421, loss=84.176, backward_time=0.488, grad_norm=75.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.107e-04, train_time=3.094 +[gpub006:0/64] 2024-01-25 19:08:58,474 (trainer:753) INFO: 14epoch:train:1001-1100batch: iter_time=1.041e-04, forward_time=0.245, loss_ctc=81.283, loss_interctc_layer6=87.769, loss_interctc_layer12=74.584, loss_interctc_layer15=69.150, loss_interctc_layer21=82.801, loss=79.117, backward_time=0.566, grad_norm=81.003, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.232, optim0_lr0=1.106e-04, train_time=3.529 +[gpub006:0/64] 2024-01-25 19:15:16,157 (trainer:753) INFO: 14epoch:train:1101-1200batch: iter_time=4.938e-04, forward_time=0.249, loss_ctc=87.070, loss_interctc_layer6=90.356, loss_interctc_layer12=77.328, loss_interctc_layer15=72.760, loss_interctc_layer21=88.857, loss=83.274, backward_time=0.556, grad_norm=76.462, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.106e-04, train_time=3.776 +[gpub006:0/64] 2024-01-25 19:18:11,161 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub006:0/64] 2024-01-25 19:18:30,046 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 19:18:33,505 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 19:18:33,505 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub006:0/64] 2024-01-25 19:18:33,510 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 19:26:17,594 (trainer:753) INFO: 14epoch:train:1201-1300batch: iter_time=3.144, forward_time=0.228, loss_ctc=94.464, loss_interctc_layer6=91.213, loss_interctc_layer12=77.091, loss_interctc_layer15=71.448, loss_interctc_layer21=96.236, loss=86.090, backward_time=0.492, grad_norm=70.151, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.106e-04, train_time=6.614 +[gpub006:0/64] 2024-01-25 19:31:13,439 (trainer:753) INFO: 14epoch:train:1301-1400batch: iter_time=8.333e-05, forward_time=0.201, loss_ctc=82.428, loss_interctc_layer6=93.514, loss_interctc_layer12=79.567, loss_interctc_layer15=73.648, loss_interctc_layer21=84.460, loss=82.723, backward_time=0.466, grad_norm=81.824, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.106e-04, train_time=2.959 +[gpub006:0/64] 2024-01-25 19:36:13,821 (trainer:753) INFO: 14epoch:train:1401-1500batch: iter_time=8.359e-05, forward_time=0.200, loss_ctc=81.359, loss_interctc_layer6=84.861, loss_interctc_layer12=71.290, loss_interctc_layer15=65.603, loss_interctc_layer21=83.735, loss=77.369, backward_time=0.463, grad_norm=81.469, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.105e-04, train_time=3.004 +[gpub006:0/64] 2024-01-25 19:41:36,422 (trainer:753) INFO: 14epoch:train:1501-1600batch: iter_time=8.418e-05, forward_time=0.201, loss_ctc=82.380, loss_interctc_layer6=81.585, loss_interctc_layer12=68.876, loss_interctc_layer15=63.848, loss_interctc_layer21=83.956, loss=76.129, backward_time=0.496, grad_norm=66.977, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.105e-04, train_time=3.226 +[gpub006:0/64] 2024-01-25 19:47:56,395 (trainer:753) INFO: 14epoch:train:1601-1700batch: iter_time=8.664e-05, forward_time=0.202, loss_ctc=106.598, loss_interctc_layer6=94.688, loss_interctc_layer12=79.450, loss_interctc_layer15=73.298, loss_interctc_layer21=109.441, loss=92.695, backward_time=0.587, grad_norm=111.761, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.231, optim0_lr0=1.105e-04, train_time=3.800 +[gpub006:0/64] 2024-01-25 19:54:22,987 (trainer:753) INFO: 14epoch:train:1701-1800batch: iter_time=8.481e-05, forward_time=0.300, loss_ctc=80.128, loss_interctc_layer6=81.643, loss_interctc_layer12=68.839, loss_interctc_layer15=63.685, loss_interctc_layer21=82.135, loss=75.286, backward_time=0.624, grad_norm=74.630, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.234, optim0_lr0=1.104e-04, train_time=3.865 +[gpub006:0/64] 2024-01-25 20:01:06,003 (trainer:753) INFO: 14epoch:train:1801-1900batch: iter_time=8.585e-05, forward_time=0.265, loss_ctc=80.199, loss_interctc_layer6=92.108, loss_interctc_layer12=78.478, loss_interctc_layer15=73.391, loss_interctc_layer21=81.778, loss=81.191, backward_time=0.665, grad_norm=81.736, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.231, optim0_lr0=1.104e-04, train_time=4.029 +[gpub006:0/64] 2024-01-25 20:07:38,381 (trainer:753) INFO: 14epoch:train:1901-2000batch: iter_time=8.374e-05, forward_time=0.203, loss_ctc=91.962, loss_interctc_layer6=98.272, loss_interctc_layer12=82.954, loss_interctc_layer15=76.443, loss_interctc_layer21=94.480, loss=88.822, backward_time=0.611, grad_norm=86.889, clip=100.000, loss_scale=1.120e+32, optim_step_time=0.228, optim0_lr0=1.104e-04, train_time=3.923 +[gpub006:0/64] 2024-01-25 20:13:41,184 (trainer:753) INFO: 14epoch:train:2001-2100batch: iter_time=8.523e-05, forward_time=0.238, loss_ctc=93.372, loss_interctc_layer6=94.686, loss_interctc_layer12=79.847, loss_interctc_layer15=73.998, loss_interctc_layer21=95.859, loss=87.552, backward_time=0.628, grad_norm=73.763, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.230, optim0_lr0=1.104e-04, train_time=3.628 +[gpub006:0/64] 2024-01-25 20:18:55,002 (trainer:753) INFO: 14epoch:train:2101-2200batch: iter_time=8.349e-05, forward_time=0.201, loss_ctc=95.644, loss_interctc_layer6=88.619, loss_interctc_layer12=74.451, loss_interctc_layer15=68.753, loss_interctc_layer21=98.268, loss=85.147, backward_time=0.484, grad_norm=76.213, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.103e-04, train_time=3.140 +[gpub006:0/64] 2024-01-25 20:25:03,766 (trainer:753) INFO: 14epoch:train:2201-2300batch: iter_time=8.220e-05, forward_time=0.206, loss_ctc=84.920, loss_interctc_layer6=88.061, loss_interctc_layer12=74.556, loss_interctc_layer15=69.048, loss_interctc_layer21=86.823, loss=80.682, backward_time=0.636, grad_norm=73.544, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.103e-04, train_time=3.687 +[gpub006:0/64] 2024-01-25 20:31:50,039 (trainer:753) INFO: 14epoch:train:2301-2400batch: iter_time=8.459e-05, forward_time=0.200, loss_ctc=78.565, loss_interctc_layer6=86.654, loss_interctc_layer12=73.066, loss_interctc_layer15=67.929, loss_interctc_layer21=80.175, loss=77.278, backward_time=0.642, grad_norm=65.113, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.103e-04, train_time=4.063 +[gpub006:0/64] 2024-01-25 20:37:02,700 (trainer:753) INFO: 14epoch:train:2401-2500batch: iter_time=8.245e-05, forward_time=0.201, loss_ctc=105.260, loss_interctc_layer6=97.864, loss_interctc_layer12=82.856, loss_interctc_layer15=77.139, loss_interctc_layer21=107.543, loss=94.133, backward_time=0.482, grad_norm=86.462, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.103e-04, train_time=3.126 +[gpub006:0/64] 2024-01-25 20:37:22,724 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub006:0/64] 2024-01-25 20:37:41,774 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 20:37:45,303 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 20:37:45,303 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub006:0/64] 2024-01-25 20:37:45,362 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 20:55:50,075 (trainer:753) INFO: 14epoch:train:2501-2600batch: iter_time=4.363, forward_time=0.257, loss_ctc=77.073, loss_interctc_layer6=86.153, loss_interctc_layer12=72.133, loss_interctc_layer15=66.618, loss_interctc_layer21=78.949, loss=76.185, backward_time=0.480, grad_norm=75.519, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.230, optim0_lr0=1.102e-04, train_time=11.274 +[gpub006:0/64] 2024-01-25 21:01:21,089 (trainer:753) INFO: 14epoch:train:2601-2700batch: iter_time=9.915e-05, forward_time=0.203, loss_ctc=79.358, loss_interctc_layer6=89.412, loss_interctc_layer12=75.731, loss_interctc_layer15=70.105, loss_interctc_layer21=81.485, loss=79.218, backward_time=0.503, grad_norm=87.854, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.228, optim0_lr0=1.102e-04, train_time=3.310 +[gpub006:0/64] 2024-01-25 21:06:39,086 (trainer:753) INFO: 14epoch:train:2701-2800batch: iter_time=9.764e-05, forward_time=0.231, loss_ctc=87.768, loss_interctc_layer6=87.723, loss_interctc_layer12=73.942, loss_interctc_layer15=68.507, loss_interctc_layer21=89.899, loss=81.568, backward_time=0.532, grad_norm=86.006, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.230, optim0_lr0=1.102e-04, train_time=3.179 +[gpub006:0/64] 2024-01-25 21:12:02,938 (trainer:753) INFO: 14epoch:train:2801-2900batch: iter_time=1.078e-04, forward_time=0.202, loss_ctc=87.137, loss_interctc_layer6=78.485, loss_interctc_layer12=66.122, loss_interctc_layer15=61.051, loss_interctc_layer21=89.262, loss=76.411, backward_time=0.485, grad_norm=69.165, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.101e-04, train_time=3.239 +[gpub006:0/64] 2024-01-25 21:13:10,648 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-25 21:17:18,931 (trainer:753) INFO: 14epoch:train:2901-3000batch: iter_time=1.081e-04, forward_time=0.202, loss_ctc=91.332, loss_interctc_layer6=86.425, loss_interctc_layer12=72.223, loss_interctc_layer15=66.444, loss_interctc_layer21=93.962, loss=82.077, backward_time=0.532, grad_norm=62.741, clip=100.000, loss_scale=9.834e+31, optim_step_time=0.228, optim0_lr0=1.101e-04, train_time=3.159 +[gpub006:0/64] 2024-01-25 21:23:45,740 (trainer:753) INFO: 14epoch:train:3001-3100batch: iter_time=1.009e-04, forward_time=0.240, loss_ctc=81.535, loss_interctc_layer6=91.753, loss_interctc_layer12=77.597, loss_interctc_layer15=71.914, loss_interctc_layer21=82.972, loss=81.154, backward_time=0.703, grad_norm=73.589, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.232, optim0_lr0=1.101e-04, train_time=3.867 +[gpub006:0/64] 2024-01-25 21:29:28,318 (trainer:753) INFO: 14epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.204, loss_ctc=81.022, loss_interctc_layer6=87.554, loss_interctc_layer12=74.310, loss_interctc_layer15=69.315, loss_interctc_layer21=83.459, loss=79.132, backward_time=0.517, grad_norm=73.356, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.101e-04, train_time=3.426 +[gpub006:0/64] 2024-01-25 21:35:17,768 (trainer:753) INFO: 14epoch:train:3201-3300batch: iter_time=1.003e-04, forward_time=0.202, loss_ctc=101.518, loss_interctc_layer6=104.530, loss_interctc_layer12=88.056, loss_interctc_layer15=81.393, loss_interctc_layer21=104.076, loss=95.914, backward_time=0.574, grad_norm=84.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.100e-04, train_time=3.495 +[gpub006:0/64] 2024-01-25 21:40:51,851 (trainer:753) INFO: 14epoch:train:3301-3400batch: iter_time=9.751e-05, forward_time=0.202, loss_ctc=101.248, loss_interctc_layer6=96.637, loss_interctc_layer12=81.553, loss_interctc_layer15=75.554, loss_interctc_layer21=104.086, loss=91.816, backward_time=0.503, grad_norm=87.209, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.100e-04, train_time=3.341 +[gpub006:0/64] 2024-01-25 21:47:23,066 (trainer:753) INFO: 14epoch:train:3401-3500batch: iter_time=1.029e-04, forward_time=0.280, loss_ctc=90.653, loss_interctc_layer6=86.523, loss_interctc_layer12=72.564, loss_interctc_layer15=66.992, loss_interctc_layer21=92.901, loss=81.927, backward_time=0.655, grad_norm=75.129, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.232, optim0_lr0=1.100e-04, train_time=3.912 +[gpub006:0/64] 2024-01-25 21:53:12,486 (trainer:753) INFO: 14epoch:train:3501-3600batch: iter_time=1.110e-04, forward_time=0.201, loss_ctc=78.027, loss_interctc_layer6=85.469, loss_interctc_layer12=72.026, loss_interctc_layer15=66.681, loss_interctc_layer21=79.972, loss=76.435, backward_time=0.593, grad_norm=73.460, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.099e-04, train_time=3.494 +[gpub006:0/64] 2024-01-25 21:58:55,375 (trainer:753) INFO: 14epoch:train:3601-3700batch: iter_time=1.038e-04, forward_time=0.202, loss_ctc=84.481, loss_interctc_layer6=88.264, loss_interctc_layer12=74.975, loss_interctc_layer15=69.998, loss_interctc_layer21=86.566, loss=80.857, backward_time=0.514, grad_norm=70.143, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.099e-04, train_time=3.429 +[gpub006:0/64] 2024-01-25 22:01:54,648 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub006:0/64] 2024-01-25 22:02:14,124 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 22:02:17,591 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 22:02:17,591 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub006:0/64] 2024-01-25 22:02:17,685 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 22:11:41,575 (trainer:753) INFO: 14epoch:train:3701-3800batch: iter_time=4.342, forward_time=0.285, loss_ctc=91.879, loss_interctc_layer6=90.936, loss_interctc_layer12=76.685, loss_interctc_layer15=71.422, loss_interctc_layer21=94.206, loss=85.026, backward_time=0.486, grad_norm=78.933, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.230, optim0_lr0=1.099e-04, train_time=7.661 +[gpub006:0/64] 2024-01-25 22:16:52,668 (trainer:753) INFO: 14epoch:train:3801-3900batch: iter_time=8.864e-05, forward_time=0.202, loss_ctc=77.381, loss_interctc_layer6=92.304, loss_interctc_layer12=77.902, loss_interctc_layer15=72.344, loss_interctc_layer21=78.925, loss=79.771, backward_time=0.527, grad_norm=78.248, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.099e-04, train_time=3.111 +[gpub006:0/64] 2024-01-25 22:22:20,362 (trainer:753) INFO: 14epoch:train:3901-4000batch: iter_time=8.447e-05, forward_time=0.201, loss_ctc=76.025, loss_interctc_layer6=83.936, loss_interctc_layer12=70.294, loss_interctc_layer15=64.717, loss_interctc_layer21=78.368, loss=74.668, backward_time=0.512, grad_norm=77.594, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.098e-04, train_time=3.277 +[gpub006:0/64] 2024-01-25 22:28:23,489 (trainer:753) INFO: 14epoch:train:4001-4100batch: iter_time=9.922e-05, forward_time=0.201, loss_ctc=77.435, loss_interctc_layer6=81.531, loss_interctc_layer12=68.753, loss_interctc_layer15=63.816, loss_interctc_layer21=79.154, loss=74.138, backward_time=0.674, grad_norm=70.863, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.098e-04, train_time=3.631 +[gpub006:0/64] 2024-01-25 22:34:00,538 (trainer:753) INFO: 14epoch:train:4101-4200batch: iter_time=9.289e-05, forward_time=0.201, loss_ctc=101.904, loss_interctc_layer6=94.616, loss_interctc_layer12=79.285, loss_interctc_layer15=73.278, loss_interctc_layer21=104.735, loss=90.763, backward_time=0.544, grad_norm=62.733, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.098e-04, train_time=3.370 +[gpub006:0/64] 2024-01-25 22:39:36,086 (trainer:753) INFO: 14epoch:train:4201-4300batch: iter_time=9.752e-05, forward_time=0.269, loss_ctc=75.131, loss_interctc_layer6=81.539, loss_interctc_layer12=68.607, loss_interctc_layer15=63.421, loss_interctc_layer21=76.826, loss=73.105, backward_time=0.560, grad_norm=68.822, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.231, optim0_lr0=1.098e-04, train_time=3.355 +[gpub006:0/64] 2024-01-25 22:45:01,812 (trainer:753) INFO: 14epoch:train:4301-4400batch: iter_time=9.910e-05, forward_time=0.201, loss_ctc=78.182, loss_interctc_layer6=91.072, loss_interctc_layer12=77.653, loss_interctc_layer15=72.630, loss_interctc_layer21=79.691, loss=79.846, backward_time=0.529, grad_norm=100.113, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.097e-04, train_time=3.257 +[gpub006:0/64] 2024-01-25 22:50:41,187 (trainer:753) INFO: 14epoch:train:4401-4500batch: iter_time=8.844e-05, forward_time=0.201, loss_ctc=88.976, loss_interctc_layer6=98.265, loss_interctc_layer12=82.780, loss_interctc_layer15=76.518, loss_interctc_layer21=90.505, loss=87.409, backward_time=0.530, grad_norm=77.277, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.097e-04, train_time=3.394 +[gpub006:0/64] 2024-01-25 22:56:34,630 (trainer:753) INFO: 14epoch:train:4501-4600batch: iter_time=9.446e-05, forward_time=0.245, loss_ctc=92.715, loss_interctc_layer6=94.668, loss_interctc_layer12=79.587, loss_interctc_layer15=73.392, loss_interctc_layer21=95.181, loss=87.109, backward_time=0.519, grad_norm=74.504, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.097e-04, train_time=3.534 +[gpub006:0/64] 2024-01-25 23:01:40,876 (trainer:753) INFO: 14epoch:train:4601-4700batch: iter_time=8.705e-05, forward_time=0.262, loss_ctc=92.068, loss_interctc_layer6=88.601, loss_interctc_layer12=73.956, loss_interctc_layer15=68.128, loss_interctc_layer21=94.539, loss=83.458, backward_time=0.487, grad_norm=79.311, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.233, optim0_lr0=1.096e-04, train_time=3.058 +[gpub006:0/64] 2024-01-25 23:07:12,296 (trainer:753) INFO: 14epoch:train:4701-4800batch: iter_time=9.525e-05, forward_time=0.201, loss_ctc=80.145, loss_interctc_layer6=86.991, loss_interctc_layer12=73.357, loss_interctc_layer15=67.871, loss_interctc_layer21=82.090, loss=78.091, backward_time=0.505, grad_norm=75.302, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.096e-04, train_time=3.318 +[gpub006:0/64] 2024-01-25 23:12:12,424 (trainer:753) INFO: 14epoch:train:4801-4900batch: iter_time=9.772e-05, forward_time=0.201, loss_ctc=75.968, loss_interctc_layer6=85.662, loss_interctc_layer12=71.983, loss_interctc_layer15=66.675, loss_interctc_layer21=77.831, loss=75.624, backward_time=0.479, grad_norm=65.438, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.096e-04, train_time=3.001 +[gpub006:0/64] 2024-01-25 23:17:51,207 (trainer:753) INFO: 14epoch:train:4901-5000batch: iter_time=1.017e-04, forward_time=0.201, loss_ctc=97.066, loss_interctc_layer6=96.902, loss_interctc_layer12=82.052, loss_interctc_layer15=76.401, loss_interctc_layer21=99.326, loss=90.349, backward_time=0.583, grad_norm=91.034, clip=100.000, loss_scale=1.444e+32, optim_step_time=0.227, optim0_lr0=1.096e-04, train_time=3.388 +[gpub006:0/64] 2024-01-25 23:18:11,236 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub006:0/64] 2024-01-25 23:18:30,336 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-25 23:18:33,797 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-25 23:18:33,797 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub006:0/64] 2024-01-25 23:18:33,911 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-25 23:29:58,749 (trainer:753) INFO: 14epoch:train:5001-5100batch: iter_time=4.046, forward_time=0.235, loss_ctc=71.834, loss_interctc_layer6=85.754, loss_interctc_layer12=71.904, loss_interctc_layer15=66.478, loss_interctc_layer21=73.636, loss=73.921, backward_time=0.512, grad_norm=71.768, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.228, optim0_lr0=1.095e-04, train_time=7.274 +[gpub006:0/64] 2024-01-25 23:33:46,157 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-25 23:35:06,654 (trainer:753) INFO: 14epoch:train:5101-5200batch: iter_time=9.414e-05, forward_time=0.201, loss_ctc=76.303, loss_interctc_layer6=89.709, loss_interctc_layer12=75.732, loss_interctc_layer15=70.288, loss_interctc_layer21=78.103, loss=78.027, backward_time=0.510, grad_norm=97.017, clip=100.000, loss_scale=1.410e+32, optim_step_time=0.226, optim0_lr0=1.095e-04, train_time=3.080 +[gpub006:0/64] 2024-01-25 23:41:01,047 (trainer:753) INFO: 14epoch:train:5201-5300batch: iter_time=9.011e-05, forward_time=0.201, loss_ctc=81.943, loss_interctc_layer6=87.723, loss_interctc_layer12=73.861, loss_interctc_layer15=68.289, loss_interctc_layer21=83.930, loss=79.149, backward_time=0.575, grad_norm=71.613, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.095e-04, train_time=3.544 +[gpub006:0/64] 2024-01-25 23:45:53,423 (trainer:753) INFO: 14epoch:train:5301-5400batch: iter_time=1.088e-04, forward_time=0.201, loss_ctc=82.822, loss_interctc_layer6=78.264, loss_interctc_layer12=65.809, loss_interctc_layer15=61.021, loss_interctc_layer21=85.070, loss=74.597, backward_time=0.467, grad_norm=64.342, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.095e-04, train_time=2.924 +[gpub006:0/64] 2024-01-25 23:51:30,367 (trainer:753) INFO: 14epoch:train:5401-5500batch: iter_time=9.366e-05, forward_time=0.201, loss_ctc=86.080, loss_interctc_layer6=85.308, loss_interctc_layer12=71.201, loss_interctc_layer15=65.291, loss_interctc_layer21=88.211, loss=79.218, backward_time=0.541, grad_norm=61.841, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.094e-04, train_time=3.369 +[gpub006:0/64] 2024-01-25 23:56:48,829 (trainer:753) INFO: 14epoch:train:5501-5600batch: iter_time=9.927e-05, forward_time=0.202, loss_ctc=75.214, loss_interctc_layer6=90.338, loss_interctc_layer12=76.377, loss_interctc_layer15=70.835, loss_interctc_layer21=76.732, loss=77.899, backward_time=0.476, grad_norm=66.413, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.094e-04, train_time=3.184 +[gpub006:0/64] 2024-01-26 00:02:19,129 (trainer:753) INFO: 14epoch:train:5601-5700batch: iter_time=9.349e-05, forward_time=0.240, loss_ctc=76.873, loss_interctc_layer6=86.851, loss_interctc_layer12=73.296, loss_interctc_layer15=68.623, loss_interctc_layer21=78.632, loss=76.855, backward_time=0.562, grad_norm=69.694, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.232, optim0_lr0=1.094e-04, train_time=3.302 +[gpub006:0/64] 2024-01-26 00:08:08,083 (trainer:753) INFO: 14epoch:train:5701-5800batch: iter_time=8.706e-05, forward_time=0.265, loss_ctc=98.200, loss_interctc_layer6=103.626, loss_interctc_layer12=87.077, loss_interctc_layer15=80.530, loss_interctc_layer21=100.327, loss=93.952, backward_time=0.605, grad_norm=93.334, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.236, optim0_lr0=1.093e-04, train_time=3.488 +[gpub006:0/64] 2024-01-26 00:13:12,781 (trainer:753) INFO: 14epoch:train:5801-5900batch: iter_time=9.542e-05, forward_time=0.201, loss_ctc=99.859, loss_interctc_layer6=96.316, loss_interctc_layer12=81.428, loss_interctc_layer15=75.278, loss_interctc_layer21=102.324, loss=91.041, backward_time=0.496, grad_norm=83.325, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.093e-04, train_time=3.048 +[gpub006:0/64] 2024-01-26 00:18:47,586 (trainer:753) INFO: 14epoch:train:5901-6000batch: iter_time=8.649e-05, forward_time=0.200, loss_ctc=84.900, loss_interctc_layer6=86.287, loss_interctc_layer12=72.265, loss_interctc_layer15=66.734, loss_interctc_layer21=87.127, loss=79.463, backward_time=0.554, grad_norm=67.251, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.093e-04, train_time=3.348 +[gpub006:0/64] 2024-01-26 00:25:29,600 (trainer:753) INFO: 14epoch:train:6001-6100batch: iter_time=9.769e-05, forward_time=0.201, loss_ctc=74.376, loss_interctc_layer6=84.467, loss_interctc_layer12=70.793, loss_interctc_layer15=65.510, loss_interctc_layer21=76.078, loss=74.245, backward_time=0.626, grad_norm=57.972, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.093e-04, train_time=4.020 +[gpub006:0/64] 2024-01-26 00:31:10,815 (trainer:753) INFO: 14epoch:train:6101-6200batch: iter_time=9.540e-05, forward_time=0.201, loss_ctc=80.337, loss_interctc_layer6=88.027, loss_interctc_layer12=74.638, loss_interctc_layer15=69.530, loss_interctc_layer21=82.103, loss=78.927, backward_time=0.589, grad_norm=78.005, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.092e-04, train_time=3.412 +[gpub006:0/64] 2024-01-26 00:34:11,985 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub006:0/64] 2024-01-26 00:34:30,770 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 00:34:34,452 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 00:34:34,452 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub006:0/64] 2024-01-26 00:34:34,455 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 00:46:04,280 (trainer:753) INFO: 14epoch:train:6201-6300batch: iter_time=4.922, forward_time=0.203, loss_ctc=84.823, loss_interctc_layer6=90.373, loss_interctc_layer12=76.501, loss_interctc_layer15=70.596, loss_interctc_layer21=86.774, loss=81.813, backward_time=0.550, grad_norm=79.143, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.092e-04, train_time=8.934 +[gpub006:0/64] 2024-01-26 00:51:13,599 (trainer:753) INFO: 14epoch:train:6301-6400batch: iter_time=9.114e-05, forward_time=0.202, loss_ctc=77.862, loss_interctc_layer6=92.700, loss_interctc_layer12=78.568, loss_interctc_layer15=72.313, loss_interctc_layer21=79.534, loss=80.196, backward_time=0.514, grad_norm=85.366, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.092e-04, train_time=3.093 +[gpub006:0/64] 2024-01-26 00:56:51,337 (trainer:753) INFO: 14epoch:train:6401-6500batch: iter_time=9.256e-05, forward_time=0.201, loss_ctc=75.612, loss_interctc_layer6=82.830, loss_interctc_layer12=69.222, loss_interctc_layer15=63.946, loss_interctc_layer21=77.615, loss=73.845, backward_time=0.579, grad_norm=70.041, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.092e-04, train_time=3.377 +[gpub006:0/64] 2024-01-26 01:01:58,389 (trainer:753) INFO: 14epoch:train:6501-6600batch: iter_time=1.013e-04, forward_time=0.202, loss_ctc=77.341, loss_interctc_layer6=81.364, loss_interctc_layer12=68.275, loss_interctc_layer15=63.127, loss_interctc_layer21=79.213, loss=73.864, backward_time=0.534, grad_norm=66.136, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.091e-04, train_time=3.070 +[gpub006:0/64] 2024-01-26 01:07:17,264 (trainer:753) INFO: 14epoch:train:6601-6700batch: iter_time=1.061e-04, forward_time=0.202, loss_ctc=101.205, loss_interctc_layer6=94.378, loss_interctc_layer12=78.911, loss_interctc_layer15=72.880, loss_interctc_layer21=104.084, loss=90.292, backward_time=0.504, grad_norm=96.494, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.091e-04, train_time=3.189 +[gpub006:0/64] 2024-01-26 01:12:17,686 (trainer:753) INFO: 14epoch:train:6701-6800batch: iter_time=1.048e-04, forward_time=0.217, loss_ctc=74.617, loss_interctc_layer6=81.382, loss_interctc_layer12=68.249, loss_interctc_layer15=63.135, loss_interctc_layer21=76.247, loss=72.726, backward_time=0.474, grad_norm=70.336, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.091e-04, train_time=3.003 +[gpub006:0/64] 2024-01-26 01:17:42,443 (trainer:753) INFO: 14epoch:train:6801-6900batch: iter_time=1.025e-04, forward_time=0.308, loss_ctc=77.410, loss_interctc_layer6=91.039, loss_interctc_layer12=76.871, loss_interctc_layer15=71.720, loss_interctc_layer21=79.186, loss=79.245, backward_time=0.551, grad_norm=98.506, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.235, optim0_lr0=1.090e-04, train_time=3.248 +[gpub006:0/64] 2024-01-26 01:24:11,234 (trainer:753) INFO: 14epoch:train:6901-7000batch: iter_time=1.001e-04, forward_time=0.237, loss_ctc=87.824, loss_interctc_layer6=97.120, loss_interctc_layer12=81.836, loss_interctc_layer15=75.646, loss_interctc_layer21=89.876, loss=86.460, backward_time=0.636, grad_norm=102.048, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.090e-04, train_time=3.886 +[gpub006:0/64] 2024-01-26 01:30:05,760 (trainer:753) INFO: 14epoch:train:7001-7100batch: iter_time=9.369e-05, forward_time=0.201, loss_ctc=90.685, loss_interctc_layer6=94.546, loss_interctc_layer12=79.344, loss_interctc_layer15=73.186, loss_interctc_layer21=93.238, loss=86.200, backward_time=0.572, grad_norm=66.284, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.090e-04, train_time=3.546 +[gpub006:0/64] 2024-01-26 01:35:34,615 (trainer:753) INFO: 14epoch:train:7101-7200batch: iter_time=9.962e-05, forward_time=0.201, loss_ctc=90.879, loss_interctc_layer6=88.297, loss_interctc_layer12=73.702, loss_interctc_layer15=67.842, loss_interctc_layer21=93.451, loss=82.834, backward_time=0.533, grad_norm=75.538, clip=100.000, loss_scale=1.022e+32, optim_step_time=0.227, optim0_lr0=1.090e-04, train_time=3.288 +[gpub006:0/64] 2024-01-26 01:41:29,250 (trainer:753) INFO: 14epoch:train:7201-7300batch: iter_time=9.023e-05, forward_time=0.201, loss_ctc=79.858, loss_interctc_layer6=86.180, loss_interctc_layer12=72.292, loss_interctc_layer15=66.819, loss_interctc_layer21=82.170, loss=77.464, backward_time=0.630, grad_norm=73.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.089e-04, train_time=3.546 +[gpub006:0/64] 2024-01-26 01:47:37,380 (trainer:753) INFO: 14epoch:train:7301-7400batch: iter_time=9.166e-05, forward_time=0.200, loss_ctc=76.031, loss_interctc_layer6=85.293, loss_interctc_layer12=71.835, loss_interctc_layer15=66.567, loss_interctc_layer21=77.819, loss=75.509, backward_time=0.597, grad_norm=67.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.089e-04, train_time=3.681 +[gpub006:0/64] 2024-01-26 01:53:26,449 (trainer:753) INFO: 14epoch:train:7401-7500batch: iter_time=8.844e-05, forward_time=0.204, loss_ctc=96.245, loss_interctc_layer6=97.733, loss_interctc_layer12=83.267, loss_interctc_layer15=78.091, loss_interctc_layer21=98.758, loss=90.819, backward_time=0.603, grad_norm=84.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.089e-04, train_time=3.490 +[gpub006:0/64] 2024-01-26 01:53:46,479 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub006:0/64] 2024-01-26 01:54:05,141 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 01:54:08,629 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 01:54:08,629 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub006:0/64] 2024-01-26 01:54:08,659 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 02:08:42,385 (trainer:753) INFO: 14epoch:train:7501-7600batch: iter_time=3.262, forward_time=0.246, loss_ctc=76.884, loss_interctc_layer6=85.271, loss_interctc_layer12=71.338, loss_interctc_layer15=65.808, loss_interctc_layer21=78.948, loss=75.650, backward_time=0.473, grad_norm=69.197, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.229, optim0_lr0=1.089e-04, train_time=9.159 +[gpub006:0/64] 2024-01-26 02:13:45,955 (trainer:753) INFO: 14epoch:train:7601-7700batch: iter_time=8.598e-05, forward_time=0.201, loss_ctc=80.989, loss_interctc_layer6=89.710, loss_interctc_layer12=75.894, loss_interctc_layer15=70.206, loss_interctc_layer21=82.830, loss=79.926, backward_time=0.483, grad_norm=88.742, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.088e-04, train_time=3.036 +[gpub006:0/64] 2024-01-26 02:19:32,511 (trainer:753) INFO: 14epoch:train:7701-7800batch: iter_time=8.982e-05, forward_time=0.201, loss_ctc=86.816, loss_interctc_layer6=86.880, loss_interctc_layer12=72.864, loss_interctc_layer15=67.201, loss_interctc_layer21=89.059, loss=80.564, backward_time=0.619, grad_norm=81.461, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.226, optim0_lr0=1.088e-04, train_time=3.465 +[gpub006:0/64] 2024-01-26 02:25:23,373 (trainer:753) INFO: 14epoch:train:7801-7900batch: iter_time=9.237e-05, forward_time=0.202, loss_ctc=86.846, loss_interctc_layer6=78.178, loss_interctc_layer12=65.880, loss_interctc_layer15=60.911, loss_interctc_layer21=89.212, loss=76.205, backward_time=0.566, grad_norm=67.181, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.226, optim0_lr0=1.088e-04, train_time=3.508 +[gpub006:0/64] 2024-01-26 02:27:36,847 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-26 02:30:29,407 (trainer:753) INFO: 14epoch:train:7901-8000batch: iter_time=9.518e-05, forward_time=0.201, loss_ctc=90.243, loss_interctc_layer6=85.108, loss_interctc_layer12=70.875, loss_interctc_layer15=65.076, loss_interctc_layer21=92.534, loss=80.767, backward_time=0.475, grad_norm=63.783, clip=100.000, loss_scale=1.155e+32, optim_step_time=0.226, optim0_lr0=1.087e-04, train_time=3.060 +[gpub006:0/64] 2024-01-26 02:36:15,256 (trainer:753) INFO: 14epoch:train:8001-8100batch: iter_time=1.020e-04, forward_time=0.203, loss_ctc=79.936, loss_interctc_layer6=90.501, loss_interctc_layer12=76.414, loss_interctc_layer15=70.730, loss_interctc_layer21=81.554, loss=79.827, backward_time=0.609, grad_norm=77.397, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.087e-04, train_time=3.458 +[gpub006:0/64] 2024-01-26 02:39:54,836 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-26 02:42:19,818 (trainer:753) INFO: 14epoch:train:8101-8200batch: iter_time=1.012e-04, forward_time=0.201, loss_ctc=80.497, loss_interctc_layer6=86.736, loss_interctc_layer12=73.229, loss_interctc_layer15=67.841, loss_interctc_layer21=82.008, loss=78.062, backward_time=0.629, grad_norm=67.209, clip=100.000, loss_scale=6.556e+31, optim_step_time=0.226, optim0_lr0=1.087e-04, train_time=3.645 +[gpub006:0/64] 2024-01-26 02:47:55,005 (trainer:753) INFO: 14epoch:train:8201-8300batch: iter_time=9.475e-05, forward_time=0.202, loss_ctc=99.249, loss_interctc_layer6=103.322, loss_interctc_layer12=86.952, loss_interctc_layer15=80.571, loss_interctc_layer21=101.285, loss=94.276, backward_time=0.536, grad_norm=80.077, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.087e-04, train_time=3.352 +[gpub006:0/64] 2024-01-26 02:53:51,623 (trainer:753) INFO: 14epoch:train:8301-8400batch: iter_time=9.712e-05, forward_time=0.273, loss_ctc=100.639, loss_interctc_layer6=96.344, loss_interctc_layer12=80.946, loss_interctc_layer15=74.826, loss_interctc_layer21=103.427, loss=91.236, backward_time=0.571, grad_norm=104.936, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.231, optim0_lr0=1.086e-04, train_time=3.565 +[gpub006:0/64] 2024-01-26 02:59:22,080 (trainer:753) INFO: 14epoch:train:8401-8500batch: iter_time=0.002, forward_time=0.249, loss_ctc=91.186, loss_interctc_layer6=86.466, loss_interctc_layer12=72.245, loss_interctc_layer15=66.710, loss_interctc_layer21=93.609, loss=82.043, backward_time=0.506, grad_norm=67.835, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.229, optim0_lr0=1.086e-04, train_time=3.305 +[gpub006:0/64] 2024-01-26 03:05:28,129 (trainer:753) INFO: 14epoch:train:8501-8600batch: iter_time=1.037e-04, forward_time=0.202, loss_ctc=76.422, loss_interctc_layer6=84.070, loss_interctc_layer12=70.564, loss_interctc_layer15=65.351, loss_interctc_layer21=78.360, loss=74.953, backward_time=0.578, grad_norm=64.602, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.086e-04, train_time=3.659 +[gpub006:0/64] 2024-01-26 03:10:37,112 (trainer:753) INFO: 14epoch:train:8601-8700batch: iter_time=9.190e-05, forward_time=0.202, loss_ctc=82.699, loss_interctc_layer6=87.701, loss_interctc_layer12=74.161, loss_interctc_layer15=69.227, loss_interctc_layer21=84.885, loss=79.735, backward_time=0.495, grad_norm=80.210, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.086e-04, train_time=3.091 +[gpub006:0/64] 2024-01-26 03:13:46,898 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub006:0/64] 2024-01-26 03:14:05,456 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 03:14:08,955 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 03:14:08,955 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub006:0/64] 2024-01-26 03:14:08,958 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 03:27:14,621 (trainer:753) INFO: 14epoch:train:8701-8800batch: iter_time=3.754, forward_time=0.255, loss_ctc=92.781, loss_interctc_layer6=90.432, loss_interctc_layer12=76.215, loss_interctc_layer15=70.399, loss_interctc_layer21=95.133, loss=84.992, backward_time=0.544, grad_norm=121.868, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.230, optim0_lr0=1.085e-04, train_time=9.974 +[gpub006:0/64] 2024-01-26 03:32:15,734 (trainer:753) INFO: 14epoch:train:8801-8900batch: iter_time=9.009e-05, forward_time=0.202, loss_ctc=80.715, loss_interctc_layer6=91.501, loss_interctc_layer12=77.374, loss_interctc_layer15=71.727, loss_interctc_layer21=82.645, loss=80.792, backward_time=0.488, grad_norm=78.901, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.085e-04, train_time=3.012 +[gpub006:0/64] 2024-01-26 03:37:18,705 (trainer:753) INFO: 14epoch:train:8901-9000batch: iter_time=9.514e-05, forward_time=0.203, loss_ctc=80.815, loss_interctc_layer6=83.612, loss_interctc_layer12=69.932, loss_interctc_layer15=64.470, loss_interctc_layer21=82.922, loss=76.350, backward_time=0.477, grad_norm=65.101, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.085e-04, train_time=3.029 +[gpub006:0/64] 2024-01-26 03:42:57,097 (trainer:753) INFO: 14epoch:train:9001-9100batch: iter_time=9.739e-05, forward_time=0.201, loss_ctc=81.008, loss_interctc_layer6=80.537, loss_interctc_layer12=67.775, loss_interctc_layer15=62.897, loss_interctc_layer21=83.025, loss=75.049, backward_time=0.521, grad_norm=63.563, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.085e-04, train_time=3.384 +[gpub006:0/64] 2024-01-26 03:48:38,135 (trainer:753) INFO: 14epoch:train:9101-9200batch: iter_time=1.054e-04, forward_time=0.202, loss_ctc=105.363, loss_interctc_layer6=93.704, loss_interctc_layer12=78.225, loss_interctc_layer15=71.957, loss_interctc_layer21=108.597, loss=91.569, backward_time=0.568, grad_norm=94.344, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.084e-04, train_time=3.410 +[gpub006:0/64] 2024-01-26 03:54:42,308 (trainer:753) INFO: 14epoch:train:9201-9300batch: iter_time=9.720e-05, forward_time=0.201, loss_ctc=78.186, loss_interctc_layer6=80.479, loss_interctc_layer12=67.521, loss_interctc_layer15=62.235, loss_interctc_layer21=80.166, loss=73.717, backward_time=0.564, grad_norm=83.341, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.084e-04, train_time=3.642 +[gpub006:0/64] 2024-01-26 04:00:52,272 (trainer:753) INFO: 14epoch:train:9301-9400batch: iter_time=9.617e-05, forward_time=0.202, loss_ctc=78.019, loss_interctc_layer6=90.017, loss_interctc_layer12=76.659, loss_interctc_layer15=71.518, loss_interctc_layer21=79.909, loss=79.224, backward_time=0.660, grad_norm=72.435, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.084e-04, train_time=3.699 +[gpub006:0/64] 2024-01-26 04:06:50,085 (trainer:753) INFO: 14epoch:train:9401-9500batch: iter_time=9.382e-05, forward_time=0.202, loss_ctc=90.663, loss_interctc_layer6=97.138, loss_interctc_layer12=81.703, loss_interctc_layer15=75.495, loss_interctc_layer21=92.632, loss=87.526, backward_time=0.549, grad_norm=84.627, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.083e-04, train_time=3.578 +[gpub006:0/64] 2024-01-26 04:12:22,280 (trainer:753) INFO: 14epoch:train:9501-9600batch: iter_time=9.442e-05, forward_time=0.201, loss_ctc=92.227, loss_interctc_layer6=93.794, loss_interctc_layer12=78.422, loss_interctc_layer15=72.350, loss_interctc_layer21=94.835, loss=86.326, backward_time=0.524, grad_norm=98.336, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.083e-04, train_time=3.322 +[gpub006:0/64] 2024-01-26 04:18:23,379 (trainer:753) INFO: 14epoch:train:9601-9700batch: iter_time=8.711e-05, forward_time=0.327, loss_ctc=94.654, loss_interctc_layer6=87.584, loss_interctc_layer12=73.424, loss_interctc_layer15=67.402, loss_interctc_layer21=97.162, loss=84.045, backward_time=0.578, grad_norm=89.900, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.233, optim0_lr0=1.083e-04, train_time=3.609 +[gpub006:0/64] 2024-01-26 04:24:01,257 (trainer:753) INFO: 14epoch:train:9701-9800batch: iter_time=9.637e-05, forward_time=0.223, loss_ctc=82.597, loss_interctc_layer6=85.998, loss_interctc_layer12=72.157, loss_interctc_layer15=66.854, loss_interctc_layer21=84.712, loss=78.464, backward_time=0.582, grad_norm=65.938, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.083e-04, train_time=3.379 +[gpub006:0/64] 2024-01-26 04:29:32,914 (trainer:753) INFO: 14epoch:train:9801-9900batch: iter_time=9.246e-05, forward_time=0.201, loss_ctc=76.695, loss_interctc_layer6=85.291, loss_interctc_layer12=71.579, loss_interctc_layer15=66.541, loss_interctc_layer21=78.698, loss=75.761, backward_time=0.542, grad_norm=69.553, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.082e-04, train_time=3.318 +[gpub006:0/64] 2024-01-26 04:35:31,717 (trainer:753) INFO: 14epoch:train:9901-10000batch: iter_time=8.944e-05, forward_time=0.202, loss_ctc=105.283, loss_interctc_layer6=97.902, loss_interctc_layer12=82.242, loss_interctc_layer15=76.358, loss_interctc_layer21=108.048, loss=93.967, backward_time=0.567, grad_norm=115.564, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.082e-04, train_time=3.588 +[gpub006:0/64] 2024-01-26 04:35:51,746 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub006:0/64] 2024-01-26 04:36:11,160 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 04:36:14,582 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 04:36:14,582 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub006:0/64] 2024-01-26 04:36:14,627 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 04:54:24,282 (trainer:753) INFO: 14epoch:train:10001-10100batch: iter_time=4.243, forward_time=0.201, loss_ctc=72.378, loss_interctc_layer6=85.375, loss_interctc_layer12=71.699, loss_interctc_layer15=65.858, loss_interctc_layer21=73.976, loss=73.857, backward_time=0.524, grad_norm=96.114, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.082e-04, train_time=11.325 +[gpub006:0/64] 2024-01-26 05:00:27,262 (trainer:753) INFO: 14epoch:train:10101-10200batch: iter_time=8.160e-05, forward_time=0.203, loss_ctc=75.564, loss_interctc_layer6=89.046, loss_interctc_layer12=75.330, loss_interctc_layer15=69.784, loss_interctc_layer21=77.510, loss=77.447, backward_time=0.581, grad_norm=79.503, clip=100.000, loss_scale=5.598e+31, optim_step_time=0.227, optim0_lr0=1.082e-04, train_time=3.630 +[gpub006:0/64] 2024-01-26 05:06:57,240 (trainer:753) INFO: 14epoch:train:10201-10300batch: iter_time=9.801e-05, forward_time=0.201, loss_ctc=82.473, loss_interctc_layer6=87.089, loss_interctc_layer12=73.058, loss_interctc_layer15=67.603, loss_interctc_layer21=84.505, loss=78.946, backward_time=0.620, grad_norm=58.338, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.081e-04, train_time=3.900 +[gpub006:0/64] 2024-01-26 05:12:46,936 (trainer:753) INFO: 14epoch:train:10301-10400batch: iter_time=9.924e-05, forward_time=0.201, loss_ctc=83.465, loss_interctc_layer6=78.014, loss_interctc_layer12=65.476, loss_interctc_layer15=60.448, loss_interctc_layer21=85.782, loss=74.637, backward_time=0.634, grad_norm=70.364, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.081e-04, train_time=3.497 +[gpub006:0/64] 2024-01-26 05:18:56,569 (trainer:753) INFO: 14epoch:train:10401-10500batch: iter_time=1.084e-04, forward_time=0.201, loss_ctc=85.798, loss_interctc_layer6=85.115, loss_interctc_layer12=70.745, loss_interctc_layer15=64.789, loss_interctc_layer21=88.526, loss=78.995, backward_time=0.639, grad_norm=65.095, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.081e-04, train_time=3.696 +[gpub006:0/64] 2024-01-26 05:25:14,447 (trainer:753) INFO: 14epoch:train:10501-10600batch: iter_time=1.018e-04, forward_time=0.204, loss_ctc=75.322, loss_interctc_layer6=90.368, loss_interctc_layer12=76.106, loss_interctc_layer15=70.473, loss_interctc_layer21=77.406, loss=77.935, backward_time=0.584, grad_norm=79.770, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.229, optim0_lr0=1.081e-04, train_time=3.779 +[gpub006:0/64] 2024-01-26 05:31:17,229 (trainer:753) INFO: 14epoch:train:10601-10700batch: iter_time=9.403e-05, forward_time=0.202, loss_ctc=76.449, loss_interctc_layer6=85.803, loss_interctc_layer12=72.684, loss_interctc_layer15=67.148, loss_interctc_layer21=77.997, loss=76.016, backward_time=0.533, grad_norm=70.963, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.080e-04, train_time=3.628 +[gpub006:0/64] 2024-01-26 05:37:43,843 (trainer:753) INFO: 14epoch:train:10701-10800batch: iter_time=9.305e-05, forward_time=0.238, loss_ctc=96.759, loss_interctc_layer6=102.628, loss_interctc_layer12=86.163, loss_interctc_layer15=79.299, loss_interctc_layer21=98.947, loss=92.759, backward_time=0.622, grad_norm=81.201, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.229, optim0_lr0=1.080e-04, train_time=3.866 +[gpub006:0/64] 2024-01-26 05:44:07,414 (trainer:753) INFO: 14epoch:train:10801-10900batch: iter_time=9.372e-05, forward_time=0.227, loss_ctc=96.795, loss_interctc_layer6=95.169, loss_interctc_layer12=80.281, loss_interctc_layer15=74.300, loss_interctc_layer21=99.408, loss=89.190, backward_time=0.631, grad_norm=70.217, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.080e-04, train_time=3.835 +[gpub006:0/64] 2024-01-26 05:50:22,218 (trainer:753) INFO: 14epoch:train:10901-11000batch: iter_time=9.098e-05, forward_time=0.233, loss_ctc=85.799, loss_interctc_layer6=85.960, loss_interctc_layer12=71.745, loss_interctc_layer15=66.104, loss_interctc_layer21=88.394, loss=79.600, backward_time=0.576, grad_norm=80.874, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.229, optim0_lr0=1.080e-04, train_time=3.748 +[gpub006:0/64] 2024-01-26 05:55:46,365 (trainer:753) INFO: 14epoch:train:11001-11100batch: iter_time=9.731e-05, forward_time=0.201, loss_ctc=74.190, loss_interctc_layer6=83.646, loss_interctc_layer12=70.170, loss_interctc_layer15=64.807, loss_interctc_layer21=76.194, loss=73.801, backward_time=0.519, grad_norm=64.229, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.079e-04, train_time=3.242 +[gpub006:0/64] 2024-01-26 06:01:55,676 (trainer:753) INFO: 14epoch:train:11101-11200batch: iter_time=9.841e-05, forward_time=0.202, loss_ctc=78.134, loss_interctc_layer6=86.601, loss_interctc_layer12=73.454, loss_interctc_layer15=68.196, loss_interctc_layer21=80.242, loss=77.325, backward_time=0.639, grad_norm=75.667, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.079e-04, train_time=3.693 +[gpub006:0/64] 2024-01-26 06:04:55,945 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub006:0/64] 2024-01-26 06:05:14,633 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 06:05:18,041 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 06:05:18,041 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub006:0/64] 2024-01-26 06:05:18,045 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 06:17:10,690 (trainer:753) INFO: 14epoch:train:11201-11300batch: iter_time=3.438, forward_time=0.204, loss_ctc=84.322, loss_interctc_layer6=90.017, loss_interctc_layer12=75.670, loss_interctc_layer15=69.877, loss_interctc_layer21=86.448, loss=81.267, backward_time=0.494, grad_norm=62.783, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.079e-04, train_time=9.150 +[gpub006:0/64] 2024-01-26 06:22:01,846 (trainer:753) INFO: 14epoch:train:11301-11400batch: iter_time=8.826e-05, forward_time=0.201, loss_ctc=74.786, loss_interctc_layer6=90.899, loss_interctc_layer12=76.542, loss_interctc_layer15=70.676, loss_interctc_layer21=76.578, loss=77.896, backward_time=0.466, grad_norm=80.635, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.078e-04, train_time=2.911 +[gpub006:0/64] 2024-01-26 06:27:48,976 (trainer:753) INFO: 14epoch:train:11401-11500batch: iter_time=9.366e-05, forward_time=0.203, loss_ctc=74.279, loss_interctc_layer6=82.921, loss_interctc_layer12=69.114, loss_interctc_layer15=63.499, loss_interctc_layer21=76.672, loss=73.297, backward_time=0.549, grad_norm=66.778, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.078e-04, train_time=3.471 +[gpub006:0/64] 2024-01-26 06:33:35,922 (trainer:753) INFO: 14epoch:train:11501-11600batch: iter_time=9.895e-05, forward_time=0.209, loss_ctc=76.482, loss_interctc_layer6=80.790, loss_interctc_layer12=68.088, loss_interctc_layer15=62.883, loss_interctc_layer21=78.112, loss=73.271, backward_time=0.534, grad_norm=71.946, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.078e-04, train_time=3.469 +[gpub006:0/64] 2024-01-26 06:39:19,768 (trainer:753) INFO: 14epoch:train:11601-11700batch: iter_time=9.713e-05, forward_time=0.240, loss_ctc=100.637, loss_interctc_layer6=93.585, loss_interctc_layer12=78.068, loss_interctc_layer15=71.748, loss_interctc_layer21=103.289, loss=89.466, backward_time=0.522, grad_norm=64.941, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.078e-04, train_time=3.438 +[gpub006:0/64] 2024-01-26 06:45:10,525 (trainer:753) INFO: 14epoch:train:11701-11800batch: iter_time=9.045e-05, forward_time=0.213, loss_ctc=73.975, loss_interctc_layer6=80.672, loss_interctc_layer12=67.501, loss_interctc_layer15=62.231, loss_interctc_layer21=75.787, loss=72.033, backward_time=0.597, grad_norm=87.941, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.077e-04, train_time=3.507 +[gpub006:0/64] 2024-01-26 06:50:33,134 (trainer:753) INFO: 14epoch:train:11801-11900batch: iter_time=8.764e-05, forward_time=0.243, loss_ctc=76.240, loss_interctc_layer6=89.870, loss_interctc_layer12=76.248, loss_interctc_layer15=70.723, loss_interctc_layer21=77.734, loss=78.163, backward_time=0.510, grad_norm=98.333, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.077e-04, train_time=3.225 +[gpub006:0/64] 2024-01-26 06:55:51,702 (trainer:753) INFO: 14epoch:train:11901-12000batch: iter_time=9.113e-05, forward_time=0.202, loss_ctc=86.663, loss_interctc_layer6=96.282, loss_interctc_layer12=80.956, loss_interctc_layer15=74.644, loss_interctc_layer21=88.647, loss=85.438, backward_time=0.508, grad_norm=91.565, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.077e-04, train_time=3.186 +[gpub006:0/64] 2024-01-26 07:01:11,981 (trainer:753) INFO: 14epoch:train:12001-12100batch: iter_time=9.326e-05, forward_time=0.202, loss_ctc=90.713, loss_interctc_layer6=93.481, loss_interctc_layer12=78.177, loss_interctc_layer15=71.944, loss_interctc_layer21=93.350, loss=85.533, backward_time=0.487, grad_norm=73.203, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.077e-04, train_time=3.204 +[gpub006:0/64] 2024-01-26 07:07:28,465 (trainer:753) INFO: 14epoch:train:12101-12200batch: iter_time=8.742e-05, forward_time=0.202, loss_ctc=90.638, loss_interctc_layer6=87.291, loss_interctc_layer12=72.789, loss_interctc_layer15=66.932, loss_interctc_layer21=93.247, loss=82.179, backward_time=0.587, grad_norm=66.309, clip=100.000, loss_scale=1.120e+32, optim_step_time=0.227, optim0_lr0=1.076e-04, train_time=3.765 +[gpub006:0/64] 2024-01-26 07:13:16,842 (trainer:753) INFO: 14epoch:train:12201-12300batch: iter_time=9.693e-05, forward_time=0.213, loss_ctc=78.533, loss_interctc_layer6=85.456, loss_interctc_layer12=71.501, loss_interctc_layer15=66.256, loss_interctc_layer21=80.420, loss=76.433, backward_time=0.531, grad_norm=66.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.076e-04, train_time=3.484 +[gpub006:0/64] 2024-01-26 07:18:28,385 (trainer:753) INFO: 14epoch:train:12301-12400batch: iter_time=9.623e-05, forward_time=0.201, loss_ctc=75.317, loss_interctc_layer6=84.746, loss_interctc_layer12=71.045, loss_interctc_layer15=65.755, loss_interctc_layer21=76.846, loss=74.742, backward_time=0.478, grad_norm=69.517, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.076e-04, train_time=3.115 +[gpub006:0/64] 2024-01-26 07:23:55,805 (trainer:753) INFO: 14epoch:train:12401-12500batch: iter_time=9.271e-05, forward_time=0.202, loss_ctc=95.379, loss_interctc_layer6=96.112, loss_interctc_layer12=81.291, loss_interctc_layer15=75.281, loss_interctc_layer21=98.022, loss=89.217, backward_time=0.505, grad_norm=78.974, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.076e-04, train_time=3.274 +[gpub006:0/64] 2024-01-26 07:24:15,835 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub006:0/64] 2024-01-26 07:24:34,374 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 07:24:37,759 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 07:24:37,759 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub006:0/64] 2024-01-26 07:24:37,815 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 07:39:27,798 (trainer:753) INFO: 14epoch:train:12501-12600batch: iter_time=3.545, forward_time=0.241, loss_ctc=76.077, loss_interctc_layer6=84.557, loss_interctc_layer12=70.591, loss_interctc_layer15=65.121, loss_interctc_layer21=77.976, loss=74.864, backward_time=0.480, grad_norm=78.916, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.229, optim0_lr0=1.075e-04, train_time=9.320 +[gpub006:0/64] 2024-01-26 07:44:53,848 (trainer:753) INFO: 14epoch:train:12601-12700batch: iter_time=8.144e-05, forward_time=0.203, loss_ctc=78.949, loss_interctc_layer6=88.236, loss_interctc_layer12=74.463, loss_interctc_layer15=68.846, loss_interctc_layer21=80.619, loss=78.222, backward_time=0.505, grad_norm=81.298, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.075e-04, train_time=3.260 +[gpub006:0/64] 2024-01-26 07:50:28,906 (trainer:753) INFO: 14epoch:train:12701-12800batch: iter_time=8.110e-05, forward_time=0.201, loss_ctc=86.315, loss_interctc_layer6=85.851, loss_interctc_layer12=72.000, loss_interctc_layer15=66.484, loss_interctc_layer21=88.092, loss=79.748, backward_time=0.559, grad_norm=74.769, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.075e-04, train_time=3.350 +[gpub006:0/64] 2024-01-26 07:55:43,190 (trainer:753) INFO: 14epoch:train:12801-12900batch: iter_time=8.464e-05, forward_time=0.200, loss_ctc=86.062, loss_interctc_layer6=77.864, loss_interctc_layer12=65.254, loss_interctc_layer15=60.065, loss_interctc_layer21=88.291, loss=75.507, backward_time=0.478, grad_norm=74.571, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.075e-04, train_time=3.142 +[gpub006:0/64] 2024-01-26 08:01:27,590 (trainer:753) INFO: 14epoch:train:12901-13000batch: iter_time=8.612e-05, forward_time=0.201, loss_ctc=89.753, loss_interctc_layer6=84.447, loss_interctc_layer12=70.364, loss_interctc_layer15=64.597, loss_interctc_layer21=92.166, loss=80.265, backward_time=0.579, grad_norm=66.971, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.226, optim0_lr0=1.074e-04, train_time=3.444 +[gpub006:0/64] 2024-01-26 08:07:38,568 (trainer:753) INFO: 14epoch:train:13001-13100batch: iter_time=8.562e-05, forward_time=0.203, loss_ctc=79.501, loss_interctc_layer6=90.218, loss_interctc_layer12=75.838, loss_interctc_layer15=70.289, loss_interctc_layer21=81.503, loss=79.470, backward_time=0.594, grad_norm=71.717, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.227, optim0_lr0=1.074e-04, train_time=3.709 +[gpub006:0/64] 2024-01-26 08:09:45,458 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-26 08:13:44,161 (trainer:753) INFO: 14epoch:train:13101-13200batch: iter_time=8.916e-05, forward_time=0.200, loss_ctc=79.395, loss_interctc_layer6=86.087, loss_interctc_layer12=72.740, loss_interctc_layer15=67.822, loss_interctc_layer21=81.293, loss=77.467, backward_time=0.531, grad_norm=66.831, clip=100.000, loss_scale=1.131e+32, optim_step_time=0.226, optim0_lr0=1.074e-04, train_time=3.656 +[gpub006:0/64] 2024-01-26 08:15:56,098 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-26 08:19:26,308 (trainer:753) INFO: 14epoch:train:13201-13300batch: iter_time=9.099e-05, forward_time=0.202, loss_ctc=99.153, loss_interctc_layer6=102.488, loss_interctc_layer12=86.047, loss_interctc_layer15=79.310, loss_interctc_layer21=101.463, loss=93.692, backward_time=0.551, grad_norm=78.774, clip=100.000, loss_scale=5.532e+31, optim_step_time=0.226, optim0_lr0=1.074e-04, train_time=3.421 +[gpub006:0/64] 2024-01-26 08:24:40,061 (trainer:753) INFO: 14epoch:train:13301-13400batch: iter_time=9.933e-05, forward_time=0.201, loss_ctc=100.472, loss_interctc_layer6=94.983, loss_interctc_layer12=79.853, loss_interctc_layer15=73.803, loss_interctc_layer21=103.335, loss=90.489, backward_time=0.529, grad_norm=73.157, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.073e-04, train_time=3.137 +[gpub006:0/64] 2024-01-26 08:30:27,303 (trainer:753) INFO: 14epoch:train:13401-13500batch: iter_time=9.572e-05, forward_time=0.267, loss_ctc=89.968, loss_interctc_layer6=85.498, loss_interctc_layer12=71.253, loss_interctc_layer15=65.550, loss_interctc_layer21=92.709, loss=80.996, backward_time=0.566, grad_norm=89.389, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.233, optim0_lr0=1.073e-04, train_time=3.472 +[gpub006:0/64] 2024-01-26 08:36:18,544 (trainer:753) INFO: 14epoch:train:13501-13600batch: iter_time=9.460e-05, forward_time=0.201, loss_ctc=76.245, loss_interctc_layer6=83.798, loss_interctc_layer12=70.114, loss_interctc_layer15=64.603, loss_interctc_layer21=78.534, loss=74.659, backward_time=0.523, grad_norm=64.566, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.073e-04, train_time=3.512 +[gpub006:0/64] 2024-01-26 08:42:30,356 (trainer:753) INFO: 14epoch:train:13601-13700batch: iter_time=9.000e-05, forward_time=0.201, loss_ctc=82.097, loss_interctc_layer6=86.030, loss_interctc_layer12=72.741, loss_interctc_layer15=67.705, loss_interctc_layer21=83.977, loss=78.510, backward_time=0.585, grad_norm=73.136, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.073e-04, train_time=3.718 +[gpub006:0/64] 2024-01-26 08:45:25,025 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub006:0/64] 2024-01-26 08:45:44,057 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 08:45:47,484 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 08:45:47,484 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub006:0/64] 2024-01-26 08:45:47,571 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 08:58:13,862 (trainer:753) INFO: 14epoch:train:13701-13800batch: iter_time=3.541, forward_time=0.201, loss_ctc=89.547, loss_interctc_layer6=89.038, loss_interctc_layer12=74.995, loss_interctc_layer15=69.126, loss_interctc_layer21=91.948, loss=82.931, backward_time=0.473, grad_norm=78.714, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.072e-04, train_time=9.434 +[gpub006:0/64] 2024-01-26 09:03:31,862 (trainer:753) INFO: 14epoch:train:13801-13900batch: iter_time=8.960e-05, forward_time=0.201, loss_ctc=77.218, loss_interctc_layer6=91.032, loss_interctc_layer12=76.914, loss_interctc_layer15=71.008, loss_interctc_layer21=79.029, loss=79.040, backward_time=0.567, grad_norm=94.895, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.072e-04, train_time=3.180 +[gpub006:0/64] 2024-01-26 09:08:52,358 (trainer:753) INFO: 14epoch:train:13901-14000batch: iter_time=8.895e-05, forward_time=0.200, loss_ctc=74.753, loss_interctc_layer6=82.156, loss_interctc_layer12=68.593, loss_interctc_layer15=63.116, loss_interctc_layer21=76.596, loss=73.043, backward_time=0.512, grad_norm=80.070, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.072e-04, train_time=3.205 +[gpub006:0/64] 2024-01-26 09:14:11,811 (trainer:753) INFO: 14epoch:train:14001-14100batch: iter_time=9.410e-05, forward_time=0.203, loss_ctc=76.056, loss_interctc_layer6=80.117, loss_interctc_layer12=67.284, loss_interctc_layer15=62.298, loss_interctc_layer21=77.970, loss=72.745, backward_time=0.489, grad_norm=68.927, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.071e-04, train_time=3.194 +[gpub006:0/64] 2024-01-26 09:21:06,097 (trainer:753) INFO: 14epoch:train:14101-14200batch: iter_time=9.245e-05, forward_time=0.202, loss_ctc=99.059, loss_interctc_layer6=92.608, loss_interctc_layer12=77.430, loss_interctc_layer15=71.023, loss_interctc_layer21=101.987, loss=88.422, backward_time=0.727, grad_norm=113.177, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.071e-04, train_time=4.143 +[gpub006:0/64] 2024-01-26 09:26:33,410 (trainer:753) INFO: 14epoch:train:14201-14300batch: iter_time=9.659e-05, forward_time=0.265, loss_ctc=73.199, loss_interctc_layer6=80.305, loss_interctc_layer12=67.359, loss_interctc_layer15=62.193, loss_interctc_layer21=75.289, loss=71.669, backward_time=0.503, grad_norm=64.421, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.229, optim0_lr0=1.071e-04, train_time=3.273 +[gpub006:0/64] 2024-01-26 09:32:01,622 (trainer:753) INFO: 14epoch:train:14301-14400batch: iter_time=8.880e-05, forward_time=0.265, loss_ctc=75.975, loss_interctc_layer6=89.031, loss_interctc_layer12=75.326, loss_interctc_layer15=70.202, loss_interctc_layer21=77.687, loss=77.644, backward_time=0.528, grad_norm=74.828, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.230, optim0_lr0=1.071e-04, train_time=3.281 +[gpub006:0/64] 2024-01-26 09:37:43,935 (trainer:753) INFO: 14epoch:train:14401-14500batch: iter_time=9.127e-05, forward_time=0.202, loss_ctc=86.824, loss_interctc_layer6=96.454, loss_interctc_layer12=81.113, loss_interctc_layer15=74.740, loss_interctc_layer21=88.766, loss=85.579, backward_time=0.548, grad_norm=88.172, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.070e-04, train_time=3.424 +[gpub006:0/64] 2024-01-26 09:43:58,504 (trainer:753) INFO: 14epoch:train:14501-14600batch: iter_time=1.005e-04, forward_time=0.202, loss_ctc=88.999, loss_interctc_layer6=93.395, loss_interctc_layer12=77.933, loss_interctc_layer15=71.797, loss_interctc_layer21=91.633, loss=84.752, backward_time=0.542, grad_norm=79.379, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.070e-04, train_time=3.746 +[gpub006:0/64] 2024-01-26 09:50:15,490 (trainer:753) INFO: 14epoch:train:14601-14700batch: iter_time=9.263e-05, forward_time=0.201, loss_ctc=90.203, loss_interctc_layer6=87.221, loss_interctc_layer12=72.638, loss_interctc_layer15=66.685, loss_interctc_layer21=92.765, loss=81.903, backward_time=0.564, grad_norm=66.528, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.070e-04, train_time=3.770 +[gpub006:0/64] 2024-01-26 09:55:47,584 (trainer:753) INFO: 14epoch:train:14701-14800batch: iter_time=9.365e-05, forward_time=0.208, loss_ctc=79.145, loss_interctc_layer6=85.744, loss_interctc_layer12=71.934, loss_interctc_layer15=66.261, loss_interctc_layer21=81.260, loss=76.869, backward_time=0.512, grad_norm=79.707, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.070e-04, train_time=3.320 +[gpub006:0/64] 2024-01-26 10:01:42,724 (trainer:753) INFO: 14epoch:train:14801-14900batch: iter_time=9.356e-05, forward_time=0.215, loss_ctc=74.167, loss_interctc_layer6=83.796, loss_interctc_layer12=70.558, loss_interctc_layer15=65.064, loss_interctc_layer21=75.788, loss=73.874, backward_time=0.574, grad_norm=71.069, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.069e-04, train_time=3.552 +[gpub006:0/64] 2024-01-26 10:04:18,473 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub006:0/64] 2024-01-26 10:07:20,237 (trainer:753) INFO: 14epoch:train:14901-15000batch: iter_time=1.031e-04, forward_time=0.202, loss_ctc=93.567, loss_interctc_layer6=95.565, loss_interctc_layer12=80.498, loss_interctc_layer15=74.381, loss_interctc_layer21=95.903, loss=87.983, backward_time=0.594, grad_norm=94.223, clip=100.000, loss_scale=2.848e+31, optim_step_time=0.227, optim0_lr0=1.069e-04, train_time=3.375 +[gpub006:0/64] 2024-01-26 10:44:43,743 (trainer:352) INFO: 14epoch results: [train] iter_time=0.302, forward_time=0.215, loss_ctc=85.230, loss_interctc_layer6=88.959, loss_interctc_layer12=74.881, loss_interctc_layer15=69.277, loss_interctc_layer21=87.343, loss=81.138, backward_time=0.548, grad_norm=77.766, clip=100.000, loss_scale=8.686e+31, optim_step_time=0.228, optim0_lr0=1.089e-04, train_time=3.866, time=16 hours, 6 minutes and 58.92 seconds, total_count=210000, gpu_max_cached_mem_GB=34.398, [valid] loss_ctc=53.697, cer_ctc=0.242, loss_interctc_layer6=57.022, cer_interctc_layer6=0.254, loss_interctc_layer12=44.783, cer_interctc_layer12=0.188, loss_interctc_layer15=40.328, cer_interctc_layer15=0.161, loss_interctc_layer21=56.110, cer_interctc_layer21=0.255, loss=50.388, time=36 minutes and 59.41 seconds, total_count=65394, gpu_max_cached_mem_GB=34.398 +[gpub006:0/64] 2024-01-26 10:45:04,210 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count +[gpub006:0/64] 2024-01-26 10:45:04,349 (trainer:461) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/8epoch.pth, exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/9epoch.pth +[gpub006:0/64] 2024-01-26 10:45:04,349 (trainer:286) INFO: 15/45epoch started. Estimated time to finish: 3 weeks, 2 days and 9 hours +[gpub006:0/64] 2024-01-26 10:45:04,365 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub006:0/64] 2024-01-26 10:45:22,546 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub006:0/64] 2024-01-26 10:45:25,894 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub006:0/64] 2024-01-26 10:45:25,894 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub006:0/64] 2024-01-26 10:45:25,898 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub006:0/64] 2024-01-26 10:55:01,422 (trainer:753) INFO: 15epoch:train:1-100batch: iter_time=2.423, forward_time=0.240, loss_ctc=91.107, loss_interctc_layer6=97.235, loss_interctc_layer12=82.032, loss_interctc_layer15=75.682, loss_interctc_layer21=93.312, loss=87.874, backward_time=0.473, grad_norm=131.145, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.069e-04, train_time=5.970 +[gpub006:0/64] 2024-01-26 10:59:54,077 (trainer:753) INFO: 15epoch:train:101-200batch: iter_time=8.023e-05, forward_time=0.200, loss_ctc=87.097, loss_interctc_layer6=90.584, loss_interctc_layer12=76.023, loss_interctc_layer15=70.234, loss_interctc_layer21=89.171, loss=82.622, backward_time=0.466, grad_norm=73.804, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.069e-04, train_time=2.926 +[gpub006:0/64] 2024-01-26 11:05:11,077 (trainer:753) INFO: 15epoch:train:201-300batch: iter_time=8.632e-05, forward_time=0.201, loss_ctc=91.775, loss_interctc_layer6=94.870, loss_interctc_layer12=79.412, loss_interctc_layer15=73.332, loss_interctc_layer21=93.922, loss=86.662, backward_time=0.488, grad_norm=146.182, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.068e-04, train_time=3.167 +[gpub006:0/64] 2024-01-26 11:10:35,885 (trainer:753) INFO: 15epoch:train:301-400batch: iter_time=8.691e-05, forward_time=0.202, loss_ctc=90.607, loss_interctc_layer6=102.352, loss_interctc_layer12=87.541, loss_interctc_layer15=81.625, loss_interctc_layer21=92.823, loss=90.990, backward_time=0.517, grad_norm=81.803, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.068e-04, train_time=3.250 +[gpub006:0/64] 2024-01-26 11:16:11,500 (trainer:753) INFO: 15epoch:train:401-500batch: iter_time=8.208e-05, forward_time=0.203, loss_ctc=82.903, loss_interctc_layer6=86.715, loss_interctc_layer12=73.358, loss_interctc_layer15=67.863, loss_interctc_layer21=85.067, loss=79.181, backward_time=0.529, grad_norm=77.634, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.068e-04, train_time=3.356 +[gpub006:0/64] 2024-01-26 11:22:21,150 (trainer:753) INFO: 15epoch:train:501-600batch: iter_time=8.312e-05, forward_time=0.280, loss_ctc=89.735, loss_interctc_layer6=99.088, loss_interctc_layer12=83.838, loss_interctc_layer15=77.618, loss_interctc_layer21=91.889, loss=88.434, backward_time=0.614, grad_norm=78.767, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.231, optim0_lr0=1.068e-04, train_time=3.696 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2892722.0 ON gpub006 CANCELLED AT 2024-01-26T11:26:48 ***