|
# Running on gpua006.delta.ncsa.illinois.edu |
|
# Started at Thu Feb 15 11:52:42 CST 2024 |
|
# SLURMD_NODENAME=gpua006 |
|
# SLURM_CLUSTER_NAME=delta |
|
# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf |
|
# SLURM_CPUS_ON_NODE=64 |
|
# SLURM_CPUS_PER_TASK=64 |
|
# SLURM_EXPORT_ENV=PATH |
|
# SLURM_GET_USER_ENV=1 |
|
# SLURM_GPUS_ON_NODE=4 |
|
# SLURM_GTIDS=0 |
|
# SLURM_JOBID=2991674 |
|
# SLURM_JOB_ACCOUNT=bbjs-delta-gpu |
|
# SLURM_JOB_CPUS_PER_NODE='64(x16)' |
|
# SLURM_JOB_END_TIME=1708192328 |
|
# SLURM_JOB_GID=202 |
|
# SLURM_JOB_GPUS=0,1,2,3 |
|
# SLURM_JOB_ID=2991674 |
|
# SLURM_JOB_NAME=exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.log |
|
# SLURM_JOB_NODELIST='gpua[006-007,012,016,033,038-040,049,054-055,057,079-080,085,089]' |
|
# SLURM_JOB_NUM_NODES=16 |
|
# SLURM_JOB_PARTITION=gpuA100x4 |
|
# SLURM_JOB_QOS=bbjs-delta-gpu |
|
# SLURM_JOB_RESERVATION=bbjs |
|
# SLURM_JOB_START_TIME=1708019528 |
|
# SLURM_JOB_UID=68077 |
|
# SLURM_JOB_USER=peng6 |
|
# SLURM_LOCALID=0 |
|
# SLURM_MEM_PER_NODE=240000 |
|
# SLURM_MPI_TYPE=pmi2 |
|
# SLURM_NNODES=16 |
|
# SLURM_NODEID=0 |
|
# SLURM_NODELIST='gpua[006-007,012,016,033,038-040,049,054-055,057,079-080,085,089]' |
|
# SLURM_NODE_ALIASES='(null)' |
|
# SLURM_OPEN_MODE=a |
|
# SLURM_PRIO_PROCESS=0 |
|
# SLURM_PROCID=0 |
|
# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1 |
|
# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu |
|
# SLURM_TASKS_PER_NODE='1(x16)' |
|
# SLURM_TASK_PID=976156 |
|
# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua006 |
|
# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node |
|
# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 |
|
# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
GpuFreq=control_disabled |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_methats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
od file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0838d6cb-1c7e-4970-b085-c26d69a4c412 |
|
[gpua006:0/64] 2024-02-15 12:00:39,764 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 |
|
[gpua006:0/64] 2024-02-15 12:01:02,819 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. |
|
[gpua006:0/64] 2024-02-15 12:01:02,895 (s2t:420) INFO: Vocabulary size: 50002 |
|
[gpua006:0/64] 2024-02-15 12:01:18,961 (abs_task:1270) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True |
|
[gpua006:0/64] 2024-02-15 12:01:18,973 (abs_task:1271) INFO: Model structure: |
|
ESPnetS2TCTCModel( |
|
(frontend): DefaultFrontend( |
|
(stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) |
|
(frontend): Frontend() |
|
(logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) |
|
) |
|
(specaug): SpecAug( |
|
(freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) |
|
(time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) |
|
) |
|
(normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) |
|
(encoder): EBranchformerCTCEncoder( |
|
(embed): Conv2dSubsampling8( |
|
(conv): Sequential( |
|
(0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) |
|
(1): ReLU() |
|
(2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) |
|
(3): ReLU() |
|
(4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) |
|
(5): ReLU() |
|
) |
|
(out): Linear(in_features=9216, out_features=1024, bias=True) |
|
(pos_enc): PositionalEncoding( |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(encoders): MultiSequential( |
|
(0): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(1): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(2): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(3): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(4): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(5): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(6): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(7): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(8): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(9): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(10): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(11): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(12): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(13): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(14): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(15): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(16): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(17): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(18): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(19): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(20): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(21): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(22): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(23): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(24): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(25): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(26): EBranchformerEncoderLayer( |
|
(attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(cgmlp): ConvolutionalGatingMLP( |
|
(channel_proj1): Sequential( |
|
(0): Linear(in_features=1024, out_features=4096, bias=True) |
|
(1): GELU(approximate='none') |
|
) |
|
(csgu): ConvolutionalSpatialGatingUnit( |
|
(norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) |
|
(conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(act): Identity() |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(feed_forward_macaron): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=1024, out_features=4096, bias=True) |
|
(w_2): Linear(in_features=4096, out_features=1024, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): Swish() |
|
) |
|
(norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(cross_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_k): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_v): Linear(in_features=1024, out_features=1024, bias=True) |
|
(linear_out): Linear(in_features=1024, out_features=1024, bias=True) |
|
(dropout): Identity() |
|
) |
|
(norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) |
|
(merge_proj): Linear(in_features=2048, out_features=1024, bias=True) |
|
) |
|
) |
|
(after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) |
|
(conditioning_layer): Linear(in_features=50002, out_features=1024, bias=True) |
|
) |
|
(prompt_encoder): TransformerEncoder( |
|
(encoders): MultiSequential( |
|
(0): EncoderLayer( |
|
(self_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_k): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_v): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_out): Linear(in_features=512, out_features=512, bias=True) |
|
(dropout): Identity() |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=512, out_features=2048, bias=True) |
|
(w_2): Linear(in_features=2048, out_features=512, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): ReLU() |
|
) |
|
(norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(1): EncoderLayer( |
|
(self_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_k): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_v): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_out): Linear(in_features=512, out_features=512, bias=True) |
|
(dropout): Identity() |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=512, out_features=2048, bias=True) |
|
(w_2): Linear(in_features=2048, out_features=512, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): ReLU() |
|
) |
|
(norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(2): EncoderLayer( |
|
(self_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_k): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_v): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_out): Linear(in_features=512, out_features=512, bias=True) |
|
(dropout): Identity() |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=512, out_features=2048, bias=True) |
|
(w_2): Linear(in_features=2048, out_features=512, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): ReLU() |
|
) |
|
(norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(3): EncoderLayer( |
|
(self_attn): MultiHeadedAttention( |
|
(linear_q): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_k): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_v): Linear(in_features=512, out_features=512, bias=True) |
|
(linear_out): Linear(in_features=512, out_features=512, bias=True) |
|
(dropout): Identity() |
|
) |
|
(feed_forward): PositionwiseFeedForward( |
|
(w_1): Linear(in_features=512, out_features=2048, bias=True) |
|
(w_2): Linear(in_features=2048, out_features=512, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(activation): ReLU() |
|
) |
|
(norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True) |
|
) |
|
(embed): Embedding(50002, 512) |
|
(pos_enc): PositionalEncoding( |
|
(dropout): Dropout(p=0.0, inplace=False) |
|
) |
|
(embed_proj): Linear(in_features=512, out_features=1024, bias=True) |
|
(prompt_proj): Linear(in_features=512, out_features=1024, bias=True) |
|
(ctc): CTC( |
|
(ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) |
|
(ctc_loss): CTCLoss() |
|
) |
|
) |
|
|
|
Model summary: |
|
Class Name: ESPnetS2TCTCModel |
|
Total Number of model parameters: 1.01 B |
|
Number of trainable parameters: 1.01 B (100.0%) |
|
Size: 4.02 GB |
|
Type: torch.float32 |
|
[gpua006:0/64] 2024-02-15 12:01:18,973 (abs_task:1274) INFO: Optimizer: |
|
AdamW ( |
|
Parameter Group 0 |
|
amsgrad: False |
|
betas: [0.9, 0.98] |
|
capturable: False |
|
eps: 1e-06 |
|
foreach: None |
|
initial_lr: 0.0002 |
|
lr: 1.6666666666666667e-09 |
|
maximize: False |
|
weight_decay: 0.0 |
|
) |
|
[gpua006:0/64] 2024-02-15 12:01:18,974 (abs_task:1275) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002]) |
|
[gpua006:0/64] 2024-02-15 12:01:18,976 (abs_task:1284) INFO: Saving the configuration in exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml |
|
[gpua006:0/64] 2024-02-15 12:01:26,056 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') |
|
[gpua006:0/64] 2024-02-15 12:01:27,182 (abs_task:1660) INFO: [valid] dataset: |
|
ESPnetDataset( |
|
speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} |
|
text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} |
|
text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} |
|
text: {"path": "dump/raw/dev_v3/text", "type": "text"} |
|
preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fa9778aad10>) |
|
[gpua006:0/64] 2024-02-15 12:01:27,182 (abs_task:1661) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, |
|
[gpua006:0/64] 2024-02-15 12:01:27,191 (abs_task:1662) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 |
|
[gpua006:0/64] 2024-02-15 12:02:53,964 (trainer:167) INFO: The training was resumed using exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/checkpoint.pth |
|
gpua006:976275:976275 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0> |
|
gpua006:976275:976275 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua006:976275:976275 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua006:976275:976275 [0] NCCL INFO cudaDriverVersion 12020 |
|
NCCL version 2.14.3+cuda11.7 |
|
[gpua006:0/64] 2024-02-15 12:03:18,809 (trainer:301) INFO: 44/45epoch started |
|
[gpua006:0/64] 2024-02-15 12:03:18,852 (multiple_iter_factory:32) INFO: Building 0th iter-factory... |
|
[gpua006:0/64] 2024-02-15 12:03:37,232 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') |
|
[gpua006:0/64] 2024-02-15 12:03:40,774 (abs_task:1660) INFO: [train] dataset: |
|
ESPnetDataset( |
|
speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} |
|
text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} |
|
text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} |
|
text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} |
|
preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fa7b62f5b70>) |
|
[gpua006:0/64] 2024-02-15 12:03:40,775 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, |
|
[gpua006:0/64] 2024-02-15 12:03:40,778 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 |
|
gpua049:151698:151698 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua049:151698:151698 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> |
|
gpua049:151698:151698 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua049:151698:151698 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua049:151698:151826 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua049:151698:151826 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua049:151698:151826 [1] NCCL INFO Using network AWS Libfabric |
|
gpua049:151698:151826 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua049:151698:151826 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua049:151698:151826 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 |
|
gpua049:151698:151826 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read |
|
gpua049:151698:151826 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read |
|
gpua049:151698:151826 [1] NCCL INFO Connected all rings |
|
gpua049:151698:151826 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua049:151698:151826 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/AWS Libfabric/1 |
|
gpua049:151698:151826 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read |
|
gpua049:151698:151826 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read |
|
gpua049:151698:151826 [1] NCCL INFO Connected all trees |
|
gpua049:151698:151826 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua049:151698:151826 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua049:151698:151826 [1] NCCL INFO comm 0x5623dac13d90 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua033:1942187:1942187 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua033:1942187:1942187 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0> |
|
gpua033:1942187:1942187 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua033:1942187:1942187 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua033:1942187:1942312 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua033:1942187:1942312 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua033:1942187:1942312 [0] NCCL INFO Using network AWS Libfabric |
|
gpua033:1942187:1942312 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua033:1942187:1942312 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua033:1942187:1942312 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877471:1877471 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua007:1877471:1877471 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.7<0> |
|
gpua007:1877471:1877471 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua007:1877471:1877471 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua007:1877471:1877610 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua007:1877471:1877610 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua007:1877471:1877610 [1] NCCL INFO Using network AWS Libfabric |
|
gpua007:1877471:1877610 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua007:1877471:1877610 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua007:1877471:1877610 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 |
|
gpua007:1877471:1877610 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read |
|
gpua007:1877471:1877610 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read |
|
gpua012:3029511:3029511 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua012:3029511:3029511 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0> |
|
gpua012:3029511:3029511 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua012:3029511:3029511 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua012:3029511:3029602 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua012:3029511:3029602 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua012:3029511:3029602 [2] NCCL INFO Using network AWS Libfabric |
|
gpua012:3029511:3029602 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua012:3029511:3029602 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua012:3029511:3029602 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 |
|
gpua012:3029511:3029602 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read |
|
gpua012:3029511:3029602 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read |
|
gpua055:421545:421545 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua055:421545:421545 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> |
|
gpua055:421545:421545 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua055:421545:421545 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua055:421545:436565 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua055:421545:436565 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua055:421545:436565 [3] NCCL INFO Using network AWS Libfabric |
|
gpua055:421545:436565 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua055:421545:436565 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua055:421545:436565 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 |
|
gpua055:421545:436565 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/AWS Libfabric/1 |
|
gpua055:421545:436565 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/AWS Libfabric/1 |
|
gpua039:3958110:3958110 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua039:3958110:3958110 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> |
|
gpua039:3958110:3958110 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua039:3958110:3958110 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua039:3958110:3958199 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua039:3958110:3958199 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua039:3958110:3958199 [3] NCCL INFO Using network AWS Libfabric |
|
gpua039:3958110:3958199 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua039:3958110:3958199 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua039:3958110:3958199 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 |
|
gpua039:3958110:3958199 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/AWS Libfabric/1 |
|
gpua039:3958110:3958199 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/AWS Libfabric/1 |
|
gpua049:151700:151700 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua049:151700:151700 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> |
|
gpua049:151700:151700 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua049:151700:151700 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua049:151700:151825 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua049:151700:151825 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua049:151700:151825 [3] NCCL INFO Using network AWS Libfabric |
|
gpua049:151700:151825 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua049:151700:151825 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua049:151700:151825 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 |
|
gpua049:151700:151825 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/AWS Libfabric/1 |
|
gpua049:151700:151825 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011106 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua079:4011106:4011106 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0> |
|
gpua079:4011106:4011106 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua079:4011106:4011106 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua079:4011106:4011246 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua079:4011106:4011246 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua079:4011106:4011246 [0] NCCL INFO Using network AWS Libfabric |
|
gpua079:4011106:4011246 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua079:4011106:4011246 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua079:4011106:4011246 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474862:474862 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua038:474862:474862 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0> |
|
gpua038:474862:474862 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua038:474862:474862 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua038:474862:474944 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua038:474862:474944 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua038:474862:474944 [1] NCCL INFO Using network AWS Libfabric |
|
gpua038:474862:474944 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua038:474862:474944 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua038:474862:474944 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 |
|
gpua038:474862:474944 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read |
|
gpua038:474862:474944 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read |
|
gpua038:474862:474944 [1] NCCL INFO Connected all rings |
|
gpua016:879605:879605 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua016:879605:879605 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> |
|
gpua016:879605:879605 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua016:879605:879605 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua016:879605:879689 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua016:879605:879689 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua016:879605:879689 [1] NCCL INFO Using network AWS Libfabric |
|
gpua016:879605:879689 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua016:879605:879689 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua016:879605:879689 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 |
|
gpua016:879605:879689 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read |
|
gpua016:879605:879689 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read |
|
gpua016:879605:879689 [1] NCCL INFO Connected all rings |
|
gpua089:1166997:1166997 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua089:1166997:1166997 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0> |
|
gpua089:1166997:1166997 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua089:1166997:1166997 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua089:1166997:1167308 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua089:1166997:1167308 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua089:1166997:1167308 [2] NCCL INFO Using network AWS Libfabric |
|
gpua089:1166997:1167308 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua089:1166997:1167308 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua089:1166997:1167308 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 |
|
gpua089:1166997:1167308 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read |
|
gpua089:1166997:1167308 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read |
|
gpua006:976275:976366 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua006:976275:976366 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua006:976275:976366 [0] NCCL INFO Using network AWS Libfabric |
|
gpua006:976275:976366 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua006:976275:976366 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua006:976275:976366 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
|
gpua006:976275:976366 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read |
|
gpua085:120732:120732 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua085:120732:120732 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0> |
|
gpua085:120732:120732 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua085:120732:120732 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua085:120732:120822 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua085:120732:120822 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua085:120732:120822 [0] NCCL INFO Using network AWS Libfabric |
|
gpua085:120732:120822 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua085:120732:120822 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua085:120732:120822 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua057:4182114:4182114 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua057:4182114:4182114 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> |
|
gpua057:4182114:4182114 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua057:4182114:4182114 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua057:4182114:4182220 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua057:4182114:4182220 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua057:4182114:4182220 [2] NCCL INFO Using network AWS Libfabric |
|
gpua057:4182114:4182220 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua057:4182114:4182220 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua057:4182114:4182220 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 |
|
gpua057:4182114:4182220 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read |
|
gpua057:4182114:4182220 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read |
|
gpua040:4155648:4155648 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua040:4155648:4155648 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0> |
|
gpua040:4155648:4155648 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua040:4155648:4155648 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua040:4155648:4155739 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua040:4155648:4155739 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua040:4155648:4155739 [2] NCCL INFO Using network AWS Libfabric |
|
gpua040:4155648:4155739 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua040:4155648:4155739 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua040:4155648:4155739 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 |
|
gpua040:4155648:4155739 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read |
|
gpua040:4155648:4155739 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read |
|
gpua080:3566292:3566292 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua080:3566292:3566292 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0> |
|
gpua080:3566292:3566292 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua080:3566292:3566292 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua080:3566292:3566382 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua080:3566292:3566382 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua080:3566292:3566382 [3] NCCL INFO Using network AWS Libfabric |
|
gpua080:3566292:3566382 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua080:3566292:3566382 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua080:3566292:3566382 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 |
|
gpua080:3566292:3566382 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/AWS Libfabric/1 |
|
gpua080:3566292:3566382 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719588:719588 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua054:719588:719588 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> |
|
gpua054:719588:719588 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua054:719588:719588 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua054:719588:719718 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua054:719588:719718 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua054:719588:719718 [1] NCCL INFO Using network AWS Libfabric |
|
gpua054:719588:719718 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua054:719588:719718 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua054:719588:719718 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 |
|
gpua054:719588:719718 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read |
|
gpua054:719588:719718 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read |
|
gpua054:719588:719718 [1] NCCL INFO Connected all rings |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read |
|
gpua033:1942187:1942312 [0] NCCL INFO Connected all rings |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/AWS Libfabric/1 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/AWS Libfabric/1 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/AWS Libfabric/1 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua033:1942187:1942312 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877471:1877610 [1] NCCL INFO Connected all rings |
|
gpua007:1877471:1877610 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/AWS Libfabric/1 |
|
gpua007:1877471:1877610 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877471:1877610 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read |
|
gpua007:1877471:1877610 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read |
|
gpua007:1877471:1877610 [1] NCCL INFO Connected all trees |
|
gpua007:1877471:1877610 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua007:1877471:1877610 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua007:1877471:1877610 [1] NCCL INFO comm 0x5572e6f75260 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua012:3029511:3029602 [2] NCCL INFO Connected all rings |
|
gpua012:3029511:3029602 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read |
|
gpua012:3029511:3029602 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read |
|
gpua012:3029511:3029602 [2] NCCL INFO Connected all trees |
|
gpua012:3029511:3029602 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua012:3029511:3029602 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua012:3029511:3029602 [2] NCCL INFO comm 0x563293232be0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua055:421545:436565 [3] NCCL INFO Connected all rings |
|
gpua055:421545:436565 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read |
|
gpua055:421545:436565 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read |
|
gpua055:421545:436565 [3] NCCL INFO Connected all trees |
|
gpua055:421545:436565 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua055:421545:436565 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua055:421545:436565 [3] NCCL INFO comm 0x5617cc287110 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua039:3958110:3958199 [3] NCCL INFO Connected all rings |
|
gpua039:3958110:3958199 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read |
|
gpua039:3958110:3958199 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read |
|
gpua039:3958110:3958199 [3] NCCL INFO Connected all trees |
|
gpua039:3958110:3958199 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua039:3958110:3958199 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua039:3958110:3958199 [3] NCCL INFO comm 0x55d87bf6d410 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua049:151700:151825 [3] NCCL INFO Connected all rings |
|
gpua049:151700:151825 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read |
|
gpua049:151700:151825 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read |
|
gpua049:151700:151825 [3] NCCL INFO Connected all trees |
|
gpua049:151700:151825 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua049:151700:151825 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua049:151700:151825 [3] NCCL INFO comm 0x555aad91f7e0 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read |
|
gpua079:4011106:4011246 [0] NCCL INFO Connected all rings |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474862:474944 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/AWS Libfabric/1 |
|
gpua038:474862:474944 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474862:474944 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read |
|
gpua038:474862:474944 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read |
|
gpua038:474862:474944 [1] NCCL INFO Connected all trees |
|
gpua038:474862:474944 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua038:474862:474944 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua038:474862:474944 [1] NCCL INFO comm 0x55b238cb59e0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua016:879605:879689 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879605:879689 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua016:879605:879689 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read |
|
gpua016:879605:879689 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read |
|
gpua016:879605:879689 [1] NCCL INFO Connected all trees |
|
gpua016:879605:879689 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua016:879605:879689 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua016:879605:879689 [1] NCCL INFO comm 0x55f84cd7d220 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua016:879607:879607 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua016:879607:879607 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> |
|
gpua016:879607:879607 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua089:1166997:1167308 [2] NCCL INFO Connected all rings |
|
gpua089:1166997:1167308 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read |
|
gpua089:1166997:1167308 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read |
|
gpua089:1166997:1167308 [2] NCCL INFO Connected all trees |
|
gpua089:1166997:1167308 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua089:1166997:1167308 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua089:1166997:1167308 [2] NCCL INFO comm 0x557deaf40ee0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua006:976275:976366 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read |
|
gpua006:976275:976366 [0] NCCL INFO Connected all rings |
|
gpua006:976275:976366 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/AWS Libfabric/1 |
|
gpua006:976275:976366 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua006:976275:976366 [0] NCCL INFO Connected all trees |
|
gpua006:976275:976366 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua006:976275:976366 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua006:976275:976366 [0] NCCL INFO comm 0x55e162d728e0 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua085:120732:120822 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read |
|
gpua085:120732:120822 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read |
|
gpua085:120732:120822 [0] NCCL INFO Connected all rings |
|
gpua085:120732:120822 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO Connected all trees |
|
gpua085:120732:120822 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua057:4182114:4182220 [2] NCCL INFO Connected all rings |
|
gpua057:4182114:4182220 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read |
|
gpua057:4182114:4182220 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read |
|
gpua057:4182114:4182220 [2] NCCL INFO Connected all trees |
|
gpua057:4182114:4182220 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua057:4182114:4182220 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua057:4182114:4182220 [2] NCCL INFO comm 0x561ec8413430 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua040:4155648:4155739 [2] NCCL INFO Connected all rings |
|
gpua040:4155648:4155739 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read |
|
gpua040:4155648:4155739 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read |
|
gpua040:4155648:4155739 [2] NCCL INFO Connected all trees |
|
gpua040:4155648:4155739 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua040:4155648:4155739 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua040:4155648:4155739 [2] NCCL INFO comm 0x555fc236f280 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua080:3566292:3566382 [3] NCCL INFO Connected all rings |
|
gpua080:3566292:3566382 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read |
|
gpua080:3566292:3566382 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read |
|
gpua080:3566292:3566382 [3] NCCL INFO Connected all trees |
|
gpua080:3566292:3566382 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua080:3566292:3566382 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua080:3566292:3566382 [3] NCCL INFO comm 0x55f6f137f630 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua054:719588:719718 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719588:719718 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua054:719588:719718 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read |
|
gpua054:719588:719718 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read |
|
gpua054:719588:719718 [1] NCCL INFO Connected all trees |
|
gpua054:719588:719718 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua054:719588:719718 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua054:719588:719718 [1] NCCL INFO comm 0x5580f5821c20 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua033:1942187:1942312 [0] NCCL INFO Connected all trees |
|
gpua033:1942187:1942312 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua033:1942187:1942312 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua033:1942187:1942312 [0] NCCL INFO comm 0x56190bef37d0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua007:1877470:1877470 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua007:1877470:1877470 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.7<0> |
|
gpua007:1877470:1877470 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua007:1877470:1877470 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua007:1877470:1877609 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua007:1877470:1877609 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua007:1877470:1877609 [0] NCCL INFO Using network AWS Libfabric |
|
gpua007:1877470:1877609 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua007:1877470:1877609 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua007:1877470:1877609 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029509 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua012:3029509:3029509 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0> |
|
gpua012:3029509:3029509 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua012:3029509:3029509 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua012:3029509:3029603 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua012:3029509:3029603 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua012:3029509:3029603 [0] NCCL INFO Using network AWS Libfabric |
|
gpua012:3029509:3029603 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua012:3029509:3029603 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua012:3029509:3029603 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua055:421543:421543 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua055:421543:421543 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> |
|
gpua055:421543:421543 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua055:421543:421543 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua055:421543:436564 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua055:421543:436564 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua055:421543:436564 [1] NCCL INFO Using network AWS Libfabric |
|
gpua055:421543:436564 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua055:421543:436564 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua055:421543:436564 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 |
|
gpua055:421543:436564 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read |
|
gpua055:421543:436564 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read |
|
gpua055:421543:436564 [1] NCCL INFO Connected all rings |
|
gpua039:3958108:3958108 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua039:3958108:3958108 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> |
|
gpua039:3958108:3958108 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua039:3958108:3958108 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua039:3958108:3958197 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua039:3958108:3958197 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua039:3958108:3958197 [1] NCCL INFO Using network AWS Libfabric |
|
gpua039:3958108:3958197 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua039:3958108:3958197 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua039:3958108:3958197 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 |
|
gpua039:3958108:3958197 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read |
|
gpua039:3958108:3958197 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read |
|
gpua049:151697:151697 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua049:151697:151697 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> |
|
gpua049:151697:151697 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua049:151697:151697 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua049:151697:151824 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua049:151697:151824 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua049:151697:151824 [0] NCCL INFO Using network AWS Libfabric |
|
gpua049:151697:151824 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua049:151697:151824 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua049:151697:151824 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua079:4011106:4011246 [0] NCCL INFO Connected all trees |
|
gpua079:4011106:4011246 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua079:4011106:4011246 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua079:4011106:4011246 [0] NCCL INFO comm 0x56504a976be0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua038:474863:474863 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua038:474863:474863 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0> |
|
gpua038:474863:474863 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua038:474863:474863 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua038:474863:474947 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua038:474863:474947 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua038:474863:474947 [2] NCCL INFO Using network AWS Libfabric |
|
gpua038:474863:474947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua038:474863:474947 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua038:474863:474947 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 |
|
gpua038:474863:474947 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read |
|
gpua038:474863:474947 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read |
|
gpua038:474863:474947 [2] NCCL INFO Connected all rings |
|
gpua016:879607:879607 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua016:879607:879688 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua016:879607:879688 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua016:879607:879688 [3] NCCL INFO Using network AWS Libfabric |
|
gpua016:879607:879688 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua016:879607:879688 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua016:879607:879688 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 |
|
gpua016:879607:879688 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879607:879688 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879607:879688 [3] NCCL INFO Connected all rings |
|
gpua016:879607:879688 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read |
|
gpua016:879607:879688 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read |
|
gpua089:1166995:1166995 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua089:1166995:1166995 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0> |
|
gpua089:1166995:1166995 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua089:1166995:1166995 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua089:1166995:1167306 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua089:1166995:1167306 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua089:1166995:1167306 [0] NCCL INFO Using network AWS Libfabric |
|
gpua089:1166995:1167306 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua089:1166995:1167306 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua089:1166995:1167306 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua006:976278:976278 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua006:976278:976278 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0> |
|
gpua006:976278:976278 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua006:976278:976278 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua006:976278:976364 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua006:976278:976364 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua006:976278:976364 [3] NCCL INFO Using network AWS Libfabric |
|
gpua006:976278:976364 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua006:976278:976364 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua006:976278:976364 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 |
|
gpua006:976278:976364 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 |
|
gpua006:976278:976364 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1 |
|
gpua085:120732:120822 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua085:120732:120822 [0] NCCL INFO comm 0x5577a1c0e150 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua057:4182113:4182113 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua057:4182113:4182113 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> |
|
gpua057:4182113:4182113 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua057:4182113:4182113 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua057:4182113:4182222 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua057:4182113:4182222 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua057:4182113:4182222 [1] NCCL INFO Using network AWS Libfabric |
|
gpua057:4182113:4182222 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua057:4182113:4182222 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua057:4182113:4182222 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 |
|
gpua057:4182113:4182222 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read |
|
gpua057:4182113:4182222 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read |
|
gpua040:4155647:4155647 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua040:4155647:4155647 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0> |
|
gpua040:4155647:4155647 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua040:4155647:4155647 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua040:4155647:4155740 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua040:4155647:4155740 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua040:4155647:4155740 [1] NCCL INFO Using network AWS Libfabric |
|
gpua040:4155647:4155740 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua040:4155647:4155740 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua040:4155647:4155740 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 |
|
gpua040:4155647:4155740 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read |
|
gpua040:4155647:4155740 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read |
|
gpua080:3566291:3566291 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua080:3566291:3566291 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0> |
|
gpua080:3566291:3566291 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua080:3566291:3566291 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua080:3566291:3566381 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua080:3566291:3566381 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua080:3566291:3566381 [2] NCCL INFO Using network AWS Libfabric |
|
gpua080:3566291:3566381 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua080:3566291:3566381 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua080:3566291:3566381 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 |
|
gpua080:3566291:3566381 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read |
|
gpua080:3566291:3566381 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read |
|
gpua054:719587:719587 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua054:719587:719587 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> |
|
gpua054:719587:719587 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua054:719587:719587 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua054:719587:719716 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua054:719587:719716 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua054:719587:719716 [0] NCCL INFO Using network AWS Libfabric |
|
gpua054:719587:719716 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua054:719587:719716 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua054:719587:719716 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua033:1942190:1942190 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua033:1942190:1942190 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0> |
|
gpua033:1942190:1942190 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua033:1942190:1942190 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua033:1942190:1942311 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua033:1942190:1942311 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua033:1942190:1942311 [3] NCCL INFO Using network AWS Libfabric |
|
gpua033:1942190:1942311 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua033:1942190:1942311 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua033:1942190:1942311 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 |
|
gpua033:1942190:1942311 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/AWS Libfabric/1 |
|
gpua033:1942190:1942311 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read |
|
gpua007:1877470:1877609 [0] NCCL INFO Connected all rings |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 |
|
gpua007:1877470:1877609 [0] NCCL INFO Connected all trees |
|
gpua007:1877470:1877609 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read |
|
gpua012:3029509:3029603 [0] NCCL INFO Connected all rings |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/AWS Libfabric/1 |
|
gpua012:3029509:3029603 [0] NCCL INFO Connected all trees |
|
gpua012:3029509:3029603 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua055:421543:436564 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua055:421543:436564 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/AWS Libfabric/1 |
|
gpua055:421543:436564 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read |
|
gpua055:421543:436564 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read |
|
gpua055:421543:436564 [1] NCCL INFO Connected all trees |
|
gpua055:421543:436564 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua055:421543:436564 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua055:421543:436564 [1] NCCL INFO comm 0x559505c28f50 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua039:3958108:3958197 [1] NCCL INFO Connected all rings |
|
gpua039:3958108:3958197 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua039:3958108:3958197 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/AWS Libfabric/1 |
|
gpua039:3958108:3958197 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read |
|
gpua039:3958108:3958197 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read |
|
gpua039:3958108:3958197 [1] NCCL INFO Connected all trees |
|
gpua039:3958108:3958197 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua039:3958108:3958197 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua039:3958108:3958197 [1] NCCL INFO comm 0x558092361c20 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua049:151697:151824 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read |
|
gpua049:151697:151824 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read |
|
gpua049:151697:151824 [0] NCCL INFO Connected all rings |
|
gpua049:151697:151824 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua049:151697:151824 [0] NCCL INFO Connected all trees |
|
gpua049:151697:151824 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua079:4011109:4011109 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua079:4011109:4011109 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0> |
|
gpua079:4011109:4011109 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua079:4011109:4011109 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua079:4011109:4011244 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua079:4011109:4011244 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua079:4011109:4011244 [3] NCCL INFO Using network AWS Libfabric |
|
gpua079:4011109:4011244 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua079:4011109:4011244 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua079:4011109:4011244 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 |
|
gpua079:4011109:4011244 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/AWS Libfabric/1 |
|
gpua079:4011109:4011244 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/AWS Libfabric/1 |
|
gpua038:474863:474947 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read |
|
gpua038:474863:474947 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read |
|
gpua038:474863:474947 [2] NCCL INFO Connected all trees |
|
gpua038:474863:474947 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua038:474863:474947 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua038:474863:474947 [2] NCCL INFO comm 0x55e0352774d0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua016:879607:879688 [3] NCCL INFO Connected all trees |
|
gpua016:879607:879688 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua016:879607:879688 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua016:879607:879688 [3] NCCL INFO comm 0x563a0300a7e0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read |
|
gpua089:1166995:1167306 [0] NCCL INFO Connected all rings |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/AWS Libfabric/1 |
|
gpua089:1166995:1167306 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/AWS Libfabric/1 |
|
gpua089:1166995:1167306 [0] NCCL INFO Connected all trees |
|
gpua089:1166995:1167306 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua089:1166995:1167306 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua006:976278:976364 [3] NCCL INFO Connected all rings |
|
gpua006:976278:976364 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read |
|
gpua006:976278:976364 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read |
|
gpua006:976278:976364 [3] NCCL INFO Connected all trees |
|
gpua006:976278:976364 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua006:976278:976364 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua006:976278:976364 [3] NCCL INFO comm 0x56023d21c6e0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua085:120735:120735 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua085:120735:120735 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0> |
|
gpua085:120735:120735 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua085:120735:120735 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua085:120735:120824 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua085:120735:120824 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua085:120735:120824 [3] NCCL INFO Using network AWS Libfabric |
|
gpua085:120735:120824 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua085:120735:120824 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua085:120735:120824 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 |
|
gpua085:120735:120824 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/AWS Libfabric/1 |
|
gpua085:120735:120824 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/AWS Libfabric/1 |
|
gpua057:4182113:4182222 [1] NCCL INFO Connected all rings |
|
gpua057:4182113:4182222 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/AWS Libfabric/1 |
|
gpua057:4182113:4182222 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua057:4182113:4182222 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read |
|
gpua057:4182113:4182222 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read |
|
gpua057:4182113:4182222 [1] NCCL INFO Connected all trees |
|
gpua057:4182113:4182222 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua057:4182113:4182222 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua057:4182113:4182222 [1] NCCL INFO comm 0x557d12aaefd0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua057:4182115:4182115 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua057:4182115:4182115 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> |
|
gpua040:4155647:4155740 [1] NCCL INFO Connected all rings |
|
gpua040:4155647:4155740 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/AWS Libfabric/1 |
|
gpua040:4155647:4155740 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua040:4155647:4155740 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read |
|
gpua040:4155647:4155740 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read |
|
gpua040:4155647:4155740 [1] NCCL INFO Connected all trees |
|
gpua040:4155647:4155740 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua040:4155647:4155740 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua040:4155647:4155740 [1] NCCL INFO comm 0x558ea55b2320 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua080:3566291:3566381 [2] NCCL INFO Connected all rings |
|
gpua080:3566291:3566381 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read |
|
gpua080:3566291:3566381 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read |
|
gpua080:3566291:3566381 [2] NCCL INFO Connected all trees |
|
gpua080:3566291:3566381 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua080:3566291:3566381 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua080:3566291:3566381 [2] NCCL INFO comm 0x55c0439085b0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua054:719587:719716 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read |
|
gpua054:719587:719716 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read |
|
gpua054:719587:719716 [0] NCCL INFO Connected all rings |
|
gpua054:719587:719716 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO Connected all trees |
|
gpua054:719587:719716 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua033:1942190:1942311 [3] NCCL INFO Connected all rings |
|
gpua033:1942190:1942311 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read |
|
gpua033:1942190:1942311 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read |
|
gpua033:1942190:1942311 [3] NCCL INFO Connected all trees |
|
gpua033:1942190:1942311 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua033:1942190:1942311 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua033:1942190:1942311 [3] NCCL INFO comm 0x55fdb8a5ac60 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua007:1877470:1877609 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua007:1877470:1877609 [0] NCCL INFO comm 0x560bd5f339a0 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua012:3029509:3029603 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua012:3029509:3029603 [0] NCCL INFO comm 0x564492351e50 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua055:421544:421544 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua055:421544:421544 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> |
|
gpua055:421544:421544 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua055:421544:421544 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua055:421544:436567 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua055:421544:436567 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua055:421544:436567 [2] NCCL INFO Using network AWS Libfabric |
|
gpua055:421544:436567 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua055:421544:436567 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua055:421544:436567 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 |
|
gpua055:421544:436567 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read |
|
gpua055:421544:436567 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read |
|
gpua055:421544:436567 [2] NCCL INFO Connected all rings |
|
gpua039:3958109:3958109 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua039:3958109:3958109 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> |
|
gpua039:3958109:3958109 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua039:3958109:3958109 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua039:3958109:3958198 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua039:3958109:3958198 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua039:3958109:3958198 [2] NCCL INFO Using network AWS Libfabric |
|
gpua039:3958109:3958198 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua039:3958109:3958198 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua039:3958109:3958198 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 |
|
gpua039:3958109:3958198 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read |
|
gpua039:3958109:3958198 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read |
|
gpua049:151697:151824 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua049:151697:151824 [0] NCCL INFO comm 0x55feb3e4d740 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua079:4011109:4011244 [3] NCCL INFO Connected all rings |
|
gpua079:4011109:4011244 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read |
|
gpua079:4011109:4011244 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read |
|
gpua079:4011109:4011244 [3] NCCL INFO Connected all trees |
|
gpua079:4011109:4011244 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua079:4011109:4011244 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua079:4011109:4011244 [3] NCCL INFO comm 0x5606001058e0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua038:474861:474861 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua038:474861:474861 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0> |
|
gpua038:474861:474861 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua038:474861:474861 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua038:474861:474946 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua038:474861:474946 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua038:474861:474946 [0] NCCL INFO Using network AWS Libfabric |
|
gpua038:474861:474946 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua038:474861:474946 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua038:474861:474946 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua016:879604:879604 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua016:879604:879604 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> |
|
gpua016:879604:879604 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua016:879604:879604 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua016:879604:879690 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua016:879604:879690 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua016:879604:879690 [0] NCCL INFO Using network AWS Libfabric |
|
gpua016:879604:879690 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua016:879604:879690 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua016:879604:879690 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua089:1166995:1167306 [0] NCCL INFO comm 0x560b92eace10 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua006:976276:976276 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua006:976276:976276 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0> |
|
gpua006:976276:976276 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua006:976276:976276 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua006:976276:976367 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua006:976276:976367 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua006:976276:976367 [1] NCCL INFO Using network AWS Libfabric |
|
gpua006:976276:976367 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua006:976276:976367 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua006:976276:976367 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 |
|
gpua006:976276:976367 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read |
|
gpua006:976276:976367 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read |
|
gpua006:976276:976367 [1] NCCL INFO Connected all rings |
|
gpua085:120735:120824 [3] NCCL INFO Connected all rings |
|
gpua085:120735:120824 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read |
|
gpua085:120735:120824 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read |
|
gpua085:120735:120824 [3] NCCL INFO Connected all trees |
|
gpua085:120735:120824 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua085:120735:120824 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua085:120735:120824 [3] NCCL INFO comm 0x56070f183a80 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua057:4182115:4182115 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua057:4182115:4182115 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua057:4182115:4182221 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua057:4182115:4182221 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua057:4182115:4182221 [3] NCCL INFO Using network AWS Libfabric |
|
gpua057:4182115:4182221 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua057:4182115:4182221 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua057:4182115:4182221 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 |
|
gpua057:4182115:4182221 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/AWS Libfabric/1 |
|
gpua057:4182115:4182221 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/AWS Libfabric/1 |
|
gpua057:4182115:4182221 [3] NCCL INFO Connected all rings |
|
gpua057:4182115:4182221 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read |
|
gpua040:4155649:4155649 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua040:4155649:4155649 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0> |
|
gpua040:4155649:4155649 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua040:4155649:4155649 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua040:4155649:4155738 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua040:4155649:4155738 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua040:4155649:4155738 [3] NCCL INFO Using network AWS Libfabric |
|
gpua040:4155649:4155738 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua040:4155649:4155738 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua040:4155649:4155738 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 |
|
gpua040:4155649:4155738 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/AWS Libfabric/1 |
|
gpua040:4155649:4155738 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566289 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua080:3566289:3566289 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0> |
|
gpua080:3566289:3566289 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua080:3566289:3566289 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua080:3566289:3566380 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua080:3566289:3566380 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua080:3566289:3566380 [0] NCCL INFO Using network AWS Libfabric |
|
gpua080:3566289:3566380 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua080:3566289:3566380 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua080:3566289:3566380 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua054:719587:719716 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua054:719587:719716 [0] NCCL INFO comm 0x5559df580ee0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua033:1942189:1942189 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua033:1942189:1942189 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0> |
|
gpua033:1942189:1942189 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua033:1942189:1942189 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua033:1942189:1942313 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua033:1942189:1942313 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua033:1942189:1942313 [2] NCCL INFO Using network AWS Libfabric |
|
gpua033:1942189:1942313 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua033:1942189:1942313 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua033:1942189:1942313 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 |
|
gpua033:1942189:1942313 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read |
|
gpua033:1942189:1942313 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read |
|
gpua007:1877472:1877472 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua007:1877472:1877472 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.7<0> |
|
gpua007:1877472:1877472 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua007:1877472:1877472 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua007:1877472:1877611 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua007:1877472:1877611 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua007:1877472:1877611 [2] NCCL INFO Using network AWS Libfabric |
|
gpua007:1877472:1877611 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua007:1877472:1877611 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua007:1877472:1877611 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 |
|
gpua007:1877472:1877611 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read |
|
gpua007:1877472:1877611 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read |
|
gpua012:3029512:3029512 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua012:3029512:3029512 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0> |
|
gpua012:3029512:3029512 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua012:3029512:3029512 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua012:3029512:3029601 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua012:3029512:3029601 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua012:3029512:3029601 [3] NCCL INFO Using network AWS Libfabric |
|
gpua012:3029512:3029601 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua012:3029512:3029601 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua012:3029512:3029601 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 |
|
gpua012:3029512:3029601 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 |
|
gpua012:3029512:3029601 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1 |
|
gpua055:421544:436567 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read |
|
gpua055:421544:436567 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read |
|
gpua055:421544:436567 [2] NCCL INFO Connected all trees |
|
gpua055:421544:436567 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua055:421544:436567 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua055:421544:436567 [2] NCCL INFO comm 0x5595a5720150 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua039:3958109:3958198 [2] NCCL INFO Connected all rings |
|
gpua039:3958109:3958198 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read |
|
gpua039:3958109:3958198 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read |
|
gpua039:3958109:3958198 [2] NCCL INFO Connected all trees |
|
gpua039:3958109:3958198 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua039:3958109:3958198 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua039:3958109:3958198 [2] NCCL INFO comm 0x5578d383c850 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua039:3958107:3958107 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua039:3958107:3958107 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> |
|
gpua039:3958107:3958107 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua039:3958107:3958107 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua039:3958107:3958196 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua049:151699:151699 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua049:151699:151699 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> |
|
gpua049:151699:151699 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua049:151699:151699 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua049:151699:151827 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua049:151699:151827 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua049:151699:151827 [2] NCCL INFO Using network AWS Libfabric |
|
gpua049:151699:151827 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua049:151699:151827 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua049:151699:151827 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 |
|
gpua049:151699:151827 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read |
|
gpua049:151699:151827 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read |
|
gpua049:151699:151827 [2] NCCL INFO Connected all rings |
|
gpua079:4011107:4011107 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua079:4011107:4011107 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0> |
|
gpua079:4011107:4011107 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua079:4011107:4011107 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua079:4011107:4011243 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua079:4011107:4011243 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua079:4011107:4011243 [1] NCCL INFO Using network AWS Libfabric |
|
gpua079:4011107:4011243 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua079:4011107:4011243 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua079:4011107:4011243 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 |
|
gpua079:4011107:4011243 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read |
|
gpua079:4011107:4011243 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read |
|
gpua038:474861:474946 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read |
|
gpua038:474861:474946 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read |
|
gpua038:474861:474946 [0] NCCL INFO Connected all rings |
|
gpua038:474861:474946 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/AWS Libfabric/1 |
|
gpua038:474861:474946 [0] NCCL INFO Connected all trees |
|
gpua038:474861:474946 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read |
|
gpua016:879604:879690 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read |
|
gpua016:879604:879690 [0] NCCL INFO Connected all rings |
|
gpua016:879604:879690 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879604:879690 [0] NCCL INFO Connected all trees |
|
gpua016:879604:879690 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua089:1166996:1166996 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua089:1166996:1166996 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0> |
|
gpua089:1166996:1166996 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua089:1166996:1166996 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua089:1166996:1167307 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua089:1166996:1167307 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua089:1166996:1167307 [1] NCCL INFO Using network AWS Libfabric |
|
gpua089:1166996:1167307 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua089:1166996:1167307 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua089:1166996:1167307 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 |
|
gpua089:1166996:1167307 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read |
|
gpua089:1166996:1167307 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read |
|
gpua006:976276:976367 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read |
|
gpua006:976276:976367 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read |
|
gpua006:976276:976367 [1] NCCL INFO Connected all trees |
|
gpua006:976276:976367 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua006:976276:976367 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua006:976276:976367 [1] NCCL INFO comm 0x55a07acd23c0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua085:120733:120733 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua085:120733:120733 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0> |
|
gpua085:120733:120733 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua085:120733:120733 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua085:120733:120821 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua085:120733:120821 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua085:120733:120821 [1] NCCL INFO Using network AWS Libfabric |
|
gpua085:120733:120821 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua085:120733:120821 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua085:120733:120821 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 |
|
gpua085:120733:120821 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read |
|
gpua085:120733:120821 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read |
|
gpua085:120733:120821 [1] NCCL INFO Connected all rings |
|
gpua057:4182115:4182221 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read |
|
gpua057:4182115:4182221 [3] NCCL INFO Connected all trees |
|
gpua057:4182115:4182221 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua057:4182115:4182221 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua057:4182115:4182221 [3] NCCL INFO comm 0x559231f91830 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua040:4155649:4155738 [3] NCCL INFO Connected all rings |
|
gpua040:4155649:4155738 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read |
|
gpua040:4155649:4155738 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read |
|
gpua040:4155649:4155738 [3] NCCL INFO Connected all trees |
|
gpua040:4155649:4155738 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua040:4155649:4155738 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua040:4155649:4155738 [3] NCCL INFO comm 0x5589e3260e30 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua040:4155646:4155646 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua040:4155646:4155646 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0> |
|
gpua040:4155646:4155646 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua040:4155646:4155646 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua040:4155646:4155741 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read |
|
gpua080:3566289:3566380 [0] NCCL INFO Connected all rings |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719590:719590 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua054:719590:719590 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> |
|
gpua054:719590:719590 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua054:719590:719590 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua054:719590:719715 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua054:719590:719715 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua054:719590:719715 [3] NCCL INFO Using network AWS Libfabric |
|
gpua054:719590:719715 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua054:719590:719715 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua054:719590:719715 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 |
|
gpua054:719590:719715 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/AWS Libfabric/1 |
|
gpua054:719590:719715 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/AWS Libfabric/1 |
|
gpua033:1942189:1942313 [2] NCCL INFO Connected all rings |
|
gpua033:1942189:1942313 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read |
|
gpua033:1942189:1942313 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read |
|
gpua033:1942189:1942313 [2] NCCL INFO Connected all trees |
|
gpua033:1942189:1942313 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua033:1942189:1942313 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua033:1942189:1942313 [2] NCCL INFO comm 0x56003d4913e0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua007:1877472:1877611 [2] NCCL INFO Connected all rings |
|
gpua007:1877472:1877611 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read |
|
gpua007:1877472:1877611 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read |
|
gpua007:1877472:1877611 [2] NCCL INFO Connected all trees |
|
gpua007:1877472:1877611 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua007:1877472:1877611 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua007:1877472:1877611 [2] NCCL INFO comm 0x56137d36aaa0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua012:3029512:3029601 [3] NCCL INFO Connected all rings |
|
gpua012:3029512:3029601 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read |
|
gpua012:3029512:3029601 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read |
|
gpua012:3029512:3029601 [3] NCCL INFO Connected all trees |
|
gpua012:3029512:3029601 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua012:3029512:3029601 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua012:3029512:3029601 [3] NCCL INFO comm 0x55972f7bc700 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua055:421542:421542 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua055:421542:421542 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> |
|
gpua055:421542:421542 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua055:421542:421542 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua055:421542:436566 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua055:421542:436566 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua055:421542:436566 [0] NCCL INFO Using network AWS Libfabric |
|
gpua055:421542:436566 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua055:421542:436566 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua055:421542:436566 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua039:3958107:3958196 [0] NCCL INFO Using network AWS Libfabric |
|
gpua039:3958107:3958196 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua039:3958107:3958196 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua039:3958107:3958196 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read |
|
gpua039:3958107:3958196 [0] NCCL INFO Connected all rings |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua049:151699:151827 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read |
|
gpua049:151699:151827 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read |
|
gpua049:151699:151827 [2] NCCL INFO Connected all trees |
|
gpua049:151699:151827 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua049:151699:151827 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua049:151699:151827 [2] NCCL INFO comm 0x55e2acb47da0 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua079:4011107:4011243 [1] NCCL INFO Connected all rings |
|
gpua079:4011107:4011243 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua079:4011107:4011243 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/AWS Libfabric/1 |
|
gpua079:4011107:4011243 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read |
|
gpua079:4011107:4011243 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read |
|
gpua079:4011107:4011243 [1] NCCL INFO Connected all trees |
|
gpua079:4011107:4011243 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua079:4011107:4011243 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua079:4011107:4011243 [1] NCCL INFO comm 0x56239f56e7e0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua038:474861:474946 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua038:474861:474946 [0] NCCL INFO comm 0x5579ce541530 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua016:879604:879690 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua016:879604:879690 [0] NCCL INFO comm 0x55ce3dfe6580 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua089:1166996:1167307 [1] NCCL INFO Connected all rings |
|
gpua089:1166996:1167307 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read |
|
gpua089:1166996:1167307 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read |
|
gpua089:1166996:1167307 [1] NCCL INFO Connected all trees |
|
gpua089:1166996:1167307 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua089:1166996:1167307 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua089:1166996:1167307 [1] NCCL INFO comm 0x563defc70860 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua006:976277:976277 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua006:976277:976277 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0> |
|
gpua006:976277:976277 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua006:976277:976277 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua006:976277:976365 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua006:976277:976365 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua006:976277:976365 [2] NCCL INFO Using network AWS Libfabric |
|
gpua006:976277:976365 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua006:976277:976365 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua006:976277:976365 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 |
|
gpua006:976277:976365 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read |
|
gpua006:976277:976365 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read |
|
gpua006:976277:976365 [2] NCCL INFO Connected all rings |
|
gpua085:120733:120821 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua085:120733:120821 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/AWS Libfabric/1 |
|
gpua085:120733:120821 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read |
|
gpua085:120733:120821 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read |
|
gpua085:120733:120821 [1] NCCL INFO Connected all trees |
|
gpua085:120733:120821 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua085:120733:120821 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua085:120733:120821 [1] NCCL INFO comm 0x55ddfd2e43a0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua057:4182112:4182112 [0] NCCL INFO cudaDriverVersion 12020 |
|
gpua057:4182112:4182112 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> |
|
gpua057:4182112:4182112 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua057:4182112:4182112 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua057:4182112:4182223 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua057:4182112:4182223 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua057:4182112:4182223 [0] NCCL INFO Using network AWS Libfabric |
|
gpua057:4182112:4182223 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua057:4182112:4182223 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua057:4182112:4182223 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua040:4155646:4155741 [0] NCCL INFO Using network AWS Libfabric |
|
gpua040:4155646:4155741 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 |
|
gpua040:4155646:4155741 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua040:4155646:4155741 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read |
|
gpua040:4155646:4155741 [0] NCCL INFO Connected all rings |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua080:3566289:3566380 [0] NCCL INFO Connected all trees |
|
gpua080:3566289:3566380 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua080:3566289:3566380 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua080:3566289:3566380 [0] NCCL INFO comm 0x561722ae1a70 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua054:719590:719715 [3] NCCL INFO Connected all rings |
|
gpua054:719590:719715 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read |
|
gpua054:719590:719715 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read |
|
gpua054:719590:719715 [3] NCCL INFO Connected all trees |
|
gpua054:719590:719715 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua054:719590:719715 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua054:719590:719715 [3] NCCL INFO comm 0x558eeffda3c0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua054:719589:719589 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua054:719589:719589 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> |
|
gpua054:719589:719589 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua054:719589:719589 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua054:719589:719717 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua033:1942188:1942188 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua033:1942188:1942188 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0> |
|
gpua033:1942188:1942188 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua033:1942188:1942188 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua033:1942188:1942310 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua033:1942188:1942310 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua033:1942188:1942310 [1] NCCL INFO Using network AWS Libfabric |
|
gpua033:1942188:1942310 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua033:1942188:1942310 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua033:1942188:1942310 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 |
|
gpua033:1942188:1942310 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read |
|
gpua033:1942188:1942310 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read |
|
gpua007:1877473:1877473 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua007:1877473:1877473 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.7<0> |
|
gpua007:1877473:1877473 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua007:1877473:1877473 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua007:1877473:1877612 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua007:1877473:1877612 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua007:1877473:1877612 [3] NCCL INFO Using network AWS Libfabric |
|
gpua007:1877473:1877612 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua007:1877473:1877612 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua007:1877473:1877612 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 |
|
gpua007:1877473:1877612 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 |
|
gpua007:1877473:1877612 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1 |
|
gpua012:3029510:3029510 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua012:3029510:3029510 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0> |
|
gpua012:3029510:3029510 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua012:3029510:3029510 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua012:3029510:3029604 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua012:3029510:3029604 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua012:3029510:3029604 [1] NCCL INFO Using network AWS Libfabric |
|
gpua012:3029510:3029604 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua012:3029510:3029604 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua012:3029510:3029604 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 |
|
gpua012:3029510:3029604 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read |
|
gpua012:3029510:3029604 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read |
|
gpua055:421542:436566 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read |
|
gpua055:421542:436566 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read |
|
gpua055:421542:436566 [0] NCCL INFO Connected all rings |
|
gpua055:421542:436566 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/AWS Libfabric/1 |
|
gpua055:421542:436566 [0] NCCL INFO Connected all trees |
|
gpua055:421542:436566 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/AWS Libfabric/1 |
|
gpua039:3958107:3958196 [0] NCCL INFO Connected all trees |
|
gpua039:3958107:3958196 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua039:3958107:3958196 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua039:3958107:3958196 [0] NCCL INFO comm 0x564bce104cf0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua079:4011108:4011108 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua079:4011108:4011108 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0> |
|
gpua079:4011108:4011108 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua079:4011108:4011108 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua079:4011108:4011245 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua079:4011108:4011245 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua079:4011108:4011245 [2] NCCL INFO Using network AWS Libfabric |
|
gpua079:4011108:4011245 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua079:4011108:4011245 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua079:4011108:4011245 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 |
|
gpua079:4011108:4011245 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read |
|
gpua079:4011108:4011245 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read |
|
gpua038:474864:474864 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua038:474864:474864 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0> |
|
gpua038:474864:474864 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua038:474864:474864 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua038:474864:474945 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua038:474864:474945 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua038:474864:474945 [3] NCCL INFO Using network AWS Libfabric |
|
gpua038:474864:474945 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua038:474864:474945 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua038:474864:474945 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 |
|
gpua038:474864:474945 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/AWS Libfabric/1 |
|
gpua038:474864:474945 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/AWS Libfabric/1 |
|
gpua016:879606:879606 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua016:879606:879606 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> |
|
gpua016:879606:879606 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua016:879606:879606 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua016:879606:879691 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua016:879606:879691 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua016:879606:879691 [2] NCCL INFO Using network AWS Libfabric |
|
gpua016:879606:879691 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua016:879606:879691 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua016:879606:879691 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 |
|
gpua016:879606:879691 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read |
|
gpua016:879606:879691 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read |
|
gpua016:879606:879691 [2] NCCL INFO Connected all rings |
|
gpua089:1166998:1166998 [3] NCCL INFO cudaDriverVersion 12020 |
|
gpua089:1166998:1166998 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0> |
|
gpua089:1166998:1166998 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua089:1166998:1166998 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua089:1166998:1167309 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua089:1166998:1167309 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua089:1166998:1167309 [3] NCCL INFO Using network AWS Libfabric |
|
gpua089:1166998:1167309 [3] NCCL INFO Setting affinity for GPU 3 to ffff |
|
gpua089:1166998:1167309 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua089:1166998:1167309 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 |
|
gpua089:1166998:1167309 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 |
|
gpua089:1166998:1167309 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1 |
|
gpua006:976277:976365 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read |
|
gpua006:976277:976365 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read |
|
gpua006:976277:976365 [2] NCCL INFO Connected all trees |
|
gpua006:976277:976365 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua006:976277:976365 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua006:976277:976365 [2] NCCL INFO comm 0x55f7aa255960 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua085:120734:120734 [2] NCCL INFO cudaDriverVersion 12020 |
|
gpua085:120734:120734 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0> |
|
gpua085:120734:120734 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua085:120734:120734 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua085:120734:120823 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua085:120734:120823 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua085:120734:120823 [2] NCCL INFO Using network AWS Libfabric |
|
gpua085:120734:120823 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua085:120734:120823 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua085:120734:120823 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 |
|
gpua085:120734:120823 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read |
|
gpua085:120734:120823 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read |
|
gpua085:120734:120823 [2] NCCL INFO Connected all rings |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read |
|
gpua057:4182112:4182223 [0] NCCL INFO Connected all rings |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/AWS Libfabric/1 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/AWS Libfabric/1 |
|
gpua057:4182112:4182223 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/AWS Libfabric/1 |
|
gpua040:4155646:4155741 [0] NCCL INFO Connected all trees |
|
gpua040:4155646:4155741 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua040:4155646:4155741 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua040:4155646:4155741 [0] NCCL INFO comm 0x5637999380e0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua080:3566290:3566290 [1] NCCL INFO cudaDriverVersion 12020 |
|
gpua080:3566290:3566290 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0> |
|
gpua080:3566290:3566290 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. |
|
gpua080:3566290:3566290 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5). |
|
gpua080:3566290:3566383 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0 |
|
gpua080:3566290:3566383 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua080:3566290:3566383 [1] NCCL INFO Using network AWS Libfabric |
|
gpua080:3566290:3566383 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 |
|
gpua080:3566290:3566383 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua080:3566290:3566383 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 |
|
gpua080:3566290:3566383 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read |
|
gpua080:3566290:3566383 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read |
|
gpua054:719589:719717 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics) |
|
gpua054:719589:719717 [2] NCCL INFO Using network AWS Libfabric |
|
gpua054:719589:719717 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 |
|
gpua054:719589:719717 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1. |
|
gpua054:719589:719717 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 |
|
gpua054:719589:719717 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read |
|
gpua054:719589:719717 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read |
|
gpua054:719589:719717 [2] NCCL INFO Connected all rings |
|
gpua054:719589:719717 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read |
|
gpua054:719589:719717 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read |
|
gpua054:719589:719717 [2] NCCL INFO Connected all trees |
|
gpua054:719589:719717 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua033:1942188:1942310 [1] NCCL INFO Connected all rings |
|
gpua033:1942188:1942310 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua033:1942188:1942310 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/AWS Libfabric/1 |
|
gpua033:1942188:1942310 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read |
|
gpua033:1942188:1942310 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read |
|
gpua033:1942188:1942310 [1] NCCL INFO Connected all trees |
|
gpua033:1942188:1942310 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua033:1942188:1942310 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua033:1942188:1942310 [1] NCCL INFO comm 0x561c1f1a10d0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua007:1877473:1877612 [3] NCCL INFO Connected all rings |
|
gpua007:1877473:1877612 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read |
|
gpua007:1877473:1877612 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read |
|
gpua007:1877473:1877612 [3] NCCL INFO Connected all trees |
|
gpua007:1877473:1877612 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua007:1877473:1877612 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua007:1877473:1877612 [3] NCCL INFO comm 0x5574419027e0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua012:3029510:3029604 [1] NCCL INFO Connected all rings |
|
gpua012:3029510:3029604 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua012:3029510:3029604 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/AWS Libfabric/1 |
|
gpua012:3029510:3029604 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read |
|
gpua012:3029510:3029604 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read |
|
gpua012:3029510:3029604 [1] NCCL INFO Connected all trees |
|
gpua012:3029510:3029604 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua012:3029510:3029604 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua012:3029510:3029604 [1] NCCL INFO comm 0x5603785a78d0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua055:421542:436566 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua055:421542:436566 [0] NCCL INFO comm 0x557dc7538880 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua079:4011108:4011245 [2] NCCL INFO Connected all rings |
|
gpua079:4011108:4011245 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read |
|
gpua079:4011108:4011245 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read |
|
gpua079:4011108:4011245 [2] NCCL INFO Connected all trees |
|
gpua079:4011108:4011245 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua079:4011108:4011245 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua079:4011108:4011245 [2] NCCL INFO comm 0x556234387e40 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua038:474864:474945 [3] NCCL INFO Connected all rings |
|
gpua038:474864:474945 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read |
|
gpua038:474864:474945 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read |
|
gpua038:474864:474945 [3] NCCL INFO Connected all trees |
|
gpua038:474864:474945 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua038:474864:474945 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua038:474864:474945 [3] NCCL INFO comm 0x55e60dc3daa0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua016:879606:879691 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read |
|
gpua016:879606:879691 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read |
|
gpua016:879606:879691 [2] NCCL INFO Connected all trees |
|
gpua016:879606:879691 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua016:879606:879691 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua016:879606:879691 [2] NCCL INFO comm 0x5648224d1260 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua089:1166998:1167309 [3] NCCL INFO Connected all rings |
|
gpua089:1166998:1167309 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read |
|
gpua089:1166998:1167309 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read |
|
gpua089:1166998:1167309 [3] NCCL INFO Connected all trees |
|
gpua089:1166998:1167309 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua089:1166998:1167309 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua089:1166998:1167309 [3] NCCL INFO comm 0x556df7b172a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE |
|
gpua085:120734:120823 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read |
|
gpua085:120734:120823 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read |
|
gpua085:120734:120823 [2] NCCL INFO Connected all trees |
|
gpua085:120734:120823 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua085:120734:120823 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua085:120734:120823 [2] NCCL INFO comm 0x5561443c27b0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
gpua057:4182112:4182223 [0] NCCL INFO Connected all trees |
|
gpua057:4182112:4182223 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua057:4182112:4182223 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua057:4182112:4182223 [0] NCCL INFO comm 0x560a56bbc810 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE |
|
gpua080:3566290:3566383 [1] NCCL INFO Connected all rings |
|
gpua080:3566290:3566383 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/AWS Libfabric/1 |
|
gpua080:3566290:3566383 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/AWS Libfabric/1 |
|
gpua080:3566290:3566383 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read |
|
gpua080:3566290:3566383 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read |
|
gpua080:3566290:3566383 [1] NCCL INFO Connected all trees |
|
gpua080:3566290:3566383 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 |
|
gpua080:3566290:3566383 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua080:3566290:3566383 [1] NCCL INFO comm 0x55d491c078d0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE |
|
gpua054:719589:719717 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer |
|
gpua054:719589:719717 [2] NCCL INFO comm 0x564abc81db40 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE |
|
[gpua006:0/64] 2024-02-15 12:29:34,261 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. |
|
[gpua006:0/64] 2024-02-15 12:31:51,645 (trainer:756) INFO: 44epoch:train:1-100batch: iter_time=5.322, forward_time=0.402, loss_ctc=77.600, loss_interctc_layer6=82.729, loss_interctc_layer12=68.542, loss_interctc_layer15=62.876, loss_interctc_layer21=80.765, loss=74.502, backward_time=0.253, grad_norm=72.290, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.143, optim0_lr0=6.100e-05, train_time=17.127 |
|
[gpua006:0/64] 2024-02-15 12:34:36,577 (trainer:756) INFO: 44epoch:train:101-200batch: iter_time=9.603e-05, forward_time=0.140, loss_ctc=56.273, loss_interctc_layer6=67.344, loss_interctc_layer12=55.524, loss_interctc_layer15=50.836, loss_interctc_layer21=58.342, loss=57.664, backward_time=0.208, grad_norm=61.756, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=6.100e-05, train_time=1.650 |
|
[gpua006:0/64] 2024-02-15 12:40:41,539 (trainer:756) INFO: 44epoch:train:201-300batch: iter_time=9.204e-05, forward_time=0.140, loss_ctc=63.383, loss_interctc_layer6=70.134, loss_interctc_layer12=57.930, loss_interctc_layer15=53.043, loss_interctc_layer21=65.653, loss=62.029, backward_time=0.204, grad_norm=70.198, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=6.100e-05, train_time=3.649 |
|
[gpua006:0/64] 2024-02-15 12:44:41,099 (trainer:756) INFO: 44epoch:train:301-400batch: iter_time=9.106e-05, forward_time=0.140, loss_ctc=83.749, loss_interctc_layer6=87.785, loss_interctc_layer12=72.950, loss_interctc_layer15=66.785, loss_interctc_layer21=86.953, loss=79.644, backward_time=0.206, grad_norm=96.768, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=6.099e-05, train_time=2.395 |
|
[gpua006:0/64] 2024-02-15 12:50:14,888 (trainer:756) INFO: 44epoch:train:401-500batch: iter_time=9.719e-05, forward_time=0.322, loss_ctc=71.915, loss_interctc_layer6=74.754, loss_interctc_layer12=61.723, loss_interctc_layer15=56.460, loss_interctc_layer21=74.798, loss=67.930, backward_time=0.398, grad_norm=83.218, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.151, optim0_lr0=6.099e-05, train_time=3.336 |
|
[gpua006:0/64] 2024-02-15 12:53:39,953 (trainer:756) INFO: 44epoch:train:501-600batch: iter_time=9.616e-05, forward_time=0.141, loss_ctc=75.507, loss_interctc_layer6=81.811, loss_interctc_layer12=68.392, loss_interctc_layer15=62.973, loss_interctc_layer21=78.434, loss=73.423, backward_time=0.205, grad_norm=119.487, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=6.098e-05, train_time=2.052 |
|
[gpua006:0/64] 2024-02-15 12:59:09,174 (trainer:756) INFO: 44epoch:train:601-700batch: iter_time=9.474e-05, forward_time=0.143, loss_ctc=84.165, loss_interctc_layer6=94.822, loss_interctc_layer12=78.774, loss_interctc_layer15=72.420, loss_interctc_layer21=87.155, loss=83.467, backward_time=0.205, grad_norm=90.875, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=6.098e-05, train_time=3.292 |
|
[gpua006:0/64] 2024-02-15 13:03:10,205 (trainer:756) INFO: 44epoch:train:701-800batch: iter_time=9.946e-05, forward_time=0.140, loss_ctc=72.099, loss_interctc_layer6=88.720, loss_interctc_layer12=73.968, loss_interctc_layer15=68.460, loss_interctc_layer21=74.662, loss=75.582, backward_time=0.206, grad_norm=102.393, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=6.097e-05, train_time=2.410 |
|
[gpua006:0/64] 2024-02-15 13:05:23,041 (trainer:756) INFO: 44epoch:train:801-900batch: iter_time=9.870e-05, forward_time=0.140, loss_ctc=61.419, loss_interctc_layer6=74.833, loss_interctc_layer12=61.863, loss_interctc_layer15=56.666, loss_interctc_layer21=63.558, loss=63.668, backward_time=0.206, grad_norm=111.320, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=6.097e-05, train_time=1.328 |
|
[gpua006:0/64] 2024-02-15 13:10:26,895 (trainer:756) INFO: 44epoch:train:901-1000batch: iter_time=2.834e-04, forward_time=0.334, loss_ctc=82.029, loss_interctc_layer6=87.769, loss_interctc_layer12=73.159, loss_interctc_layer15=67.256, loss_interctc_layer21=85.068, loss=79.056, backward_time=0.287, grad_norm=80.625, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.149, optim0_lr0=6.096e-05, train_time=3.037 |
|
[gpua006:0/64] 2024-02-15 13:14:12,320 (trainer:756) INFO: 44epoch:train:1001-1100batch: iter_time=9.039e-05, forward_time=0.191, loss_ctc=79.715, loss_interctc_layer6=90.556, loss_interctc_layer12=76.023, loss_interctc_layer15=70.476, loss_interctc_layer21=82.605, loss=79.875, backward_time=0.253, grad_norm=95.696, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=6.096e-05, train_time=2.252 |
|
[gpua006:0/64] 2024-02-15 13:18:14,174 (trainer:756) INFO: 44epoch:train:1101-1200batch: iter_time=9.335e-05, forward_time=0.142, loss_ctc=71.375, loss_interctc_layer6=78.806, loss_interctc_layer12=65.631, loss_interctc_layer15=60.292, loss_interctc_layer21=74.099, loss=70.040, backward_time=0.204, grad_norm=104.875, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=6.095e-05, train_time=2.420 |
|
[gpua006:0/64] 2024-02-15 13:20:01,919 (multiple_iter_factory:32) INFO: Building 1th iter-factory... |
|
[gpua006:0/64] 2024-02-15 13:20:21,077 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') |
|
[gpua006:0/64] 2024-02-15 13:20:24,722 (abs_task:1660) INFO: [train] dataset: |
|
ESPnetDataset( |
|
speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} |
|
text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} |
|
text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} |
|
text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} |
|
preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7f9f339910c0>) |
|
[gpua006:0/64] 2024-02-15 13:20:24,723 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, |
|
[gpua006:0/64] 2024-02-15 13:20:25,005 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 |
|
[gpua006:0/64] 2024-02-15 13:40:10,724 (trainer:756) INFO: 44epoch:train:1201-1300batch: iter_time=5.557, forward_time=0.141, loss_ctc=74.267, loss_interctc_layer6=85.036, loss_interctc_layer12=70.278, loss_interctc_layer15=64.366, loss_interctc_layer21=77.052, loss=74.200, backward_time=0.206, grad_norm=176.936, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=6.095e-05, train_time=13.166 |
|
[gpua006:0/64] 2024-02-15 13:42:52,805 (trainer:756) INFO: 44epoch:train:1301-1400batch: iter_time=8.760e-05, forward_time=0.142, loss_ctc=65.990, loss_interctc_layer6=72.936, loss_interctc_layer12=60.245, loss_interctc_layer15=55.178, loss_interctc_layer21=68.407, loss=64.551, backward_time=0.209, grad_norm=128.369, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=6.094e-05, train_time=1.621 |
|
[gpua006:0/64] 2024-02-15 13:44:37,286 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model. |
|
[gpua006:0/64] 2024-02-15 13:45:38,467 (trainer:756) INFO: 44epoch:train:1401-1500batch: iter_time=8.678e-05, forward_time=0.306, loss_ctc=62.190, loss_interctc_layer6=70.823, loss_interctc_layer12=58.543, loss_interctc_layer15=53.639, loss_interctc_layer21=64.338, loss=61.907, backward_time=0.299, grad_norm=74.908, clip=100.000, loss_scale=1.711e+31, optim_step_time=0.156, optim0_lr0=6.094e-05, train_time=1.653 |
|
[gpua006:0/64] 2024-02-15 13:49:46,045 (trainer:756) INFO: 44epoch:train:1501-1600batch: iter_time=8.717e-05, forward_time=0.170, loss_ctc=58.098, loss_interctc_layer6=69.488, loss_interctc_layer12=57.350, loss_interctc_layer15=52.513, loss_interctc_layer21=60.165, loss=59.523, backward_time=0.250, grad_norm=66.391, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=6.093e-05, train_time=2.478 |
|
[gpua006:0/64] 2024-02-15 13:52:37,466 (trainer:756) INFO: 44epoch:train:1601-1700batch: iter_time=8.983e-05, forward_time=0.143, loss_ctc=97.248, loss_interctc_layer6=92.237, loss_interctc_layer12=76.065, loss_interctc_layer15=69.537, loss_interctc_layer21=100.975, loss=87.212, backward_time=0.206, grad_norm=107.402, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=6.093e-05, train_time=1.713 |
|
[gpua006:0/64] 2024-02-15 13:57:35,159 (trainer:756) INFO: 44epoch:train:1701-1800batch: iter_time=9.110e-05, forward_time=0.144, loss_ctc=64.583, loss_interctc_layer6=71.856, loss_interctc_layer12=59.074, loss_interctc_layer15=53.929, loss_interctc_layer21=67.005, loss=63.290, backward_time=0.206, grad_norm=70.962, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=6.092e-05, train_time=2.978 |
|
[gpua006:0/64] 2024-02-15 14:02:07,132 (trainer:756) INFO: 44epoch:train:1801-1900batch: iter_time=8.727e-05, forward_time=0.142, loss_ctc=77.353, loss_interctc_layer6=87.252, loss_interctc_layer12=72.766, loss_interctc_layer15=66.952, loss_interctc_layer21=80.343, loss=76.933, backward_time=0.206, grad_norm=75.575, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=6.092e-05, train_time=2.720 |
|
[gpua006:0/64] 2024-02-15 14:05:22,774 (trainer:756) INFO: 44epoch:train:1901-2000batch: iter_time=8.500e-05, forward_time=0.143, loss_ctc=80.574, loss_interctc_layer6=96.639, loss_interctc_layer12=80.574, loss_interctc_layer15=74.359, loss_interctc_layer21=83.503, loss=83.130, backward_time=0.208, grad_norm=95.884, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=6.092e-05, train_time=1.956 |
|
[gpua006:0/64] 2024-02-15 14:07:37,193 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model. |
|
[gpua006:0/64] 2024-02-15 14:07:49,031 (trainer:756) INFO: 44epoch:train:2001-2100batch: iter_time=8.733e-05, forward_time=0.180, loss_ctc=61.372, loss_interctc_layer6=74.493, loss_interctc_layer12=61.318, loss_interctc_layer15=56.144, loss_interctc_layer21=63.554, loss=63.376, backward_time=0.239, grad_norm=82.381, clip=100.000, loss_scale=9.834e+30, optim_step_time=0.138, optim0_lr0=6.091e-05, train_time=1.462 |
|
[gpua006:0/64] 2024-02-15 14:12:03,032 (trainer:756) INFO: 44epoch:train:2101-2200batch: iter_time=9.052e-05, forward_time=0.333, loss_ctc=72.763, loss_interctc_layer6=83.301, loss_interctc_layer12=69.266, loss_interctc_layer15=63.766, loss_interctc_layer21=75.447, loss=72.909, backward_time=0.329, grad_norm=87.425, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.156, optim0_lr0=6.091e-05, train_time=2.539 |
|
[gpua006:0/64] 2024-02-15 14:15:22,207 (trainer:756) INFO: 44epoch:train:2201-2300batch: iter_time=9.271e-05, forward_time=0.143, loss_ctc=79.764, loss_interctc_layer6=84.073, loss_interctc_layer12=69.909, loss_interctc_layer15=63.902, loss_interctc_layer21=82.684, loss=76.066, backward_time=0.206, grad_norm=86.251, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=6.090e-05, train_time=1.993 |
|
[gpua006:0/64] 2024-02-15 14:19:30,723 (trainer:756) INFO: 44epoch:train:2301-2400batch: iter_time=9.251e-05, forward_time=0.143, loss_ctc=74.045, loss_interctc_layer6=83.786, loss_interctc_layer12=70.018, loss_interctc_layer15=64.490, loss_interctc_layer21=76.757, loss=73.819, backward_time=0.206, grad_norm=80.263, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=6.090e-05, train_time=2.483 |
|
[gpua006:0/64] 2024-02-15 14:22:32,200 (trainer:756) INFO: 44epoch:train:2401-2500batch: iter_time=9.198e-05, forward_time=0.144, loss_ctc=71.036, loss_interctc_layer6=83.202, loss_interctc_layer12=69.054, loss_interctc_layer15=63.397, loss_interctc_layer21=73.627, loss=72.063, backward_time=0.207, grad_norm=80.207, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=6.089e-05, train_time=1.817 |
|
[gpua006:0/64] 2024-02-15 14:22:52,229 (multiple_iter_factory:32) INFO: Building 2th iter-factory... |
|
[gpua006:0/64] 2024-02-15 14:23:11,439 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') |
|
[gpua006:0/64] 2024-02-15 14:23:14,942 (abs_task:1660) INFO: [train] dataset: |
|
ESPnetDataset( |
|
speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} |
|
text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} |
|
text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} |
|
text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} |
|
preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7f9bd791f8b0>) |
|
[gpua006:0/64] 2024-02-15 14:23:14,942 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, |
|
[gpua006:0/64] 2024-02-15 14:23:14,949 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 |
|
srun: Job step aborted: Waiting up to 32 seconds for job step to finish. |
|
|