Yerbolat Khassanov
First model version
39c21b4
# python3 -m espnet2.bin.asr_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram2000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram2000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,sound --valid_shape_file exp/asr_stats_raw_bpe2000_sp/valid/speech_shape --resume true --ignore_init_mismatch false --fold_length 80000 --output_dir exp/asr_train_raw_bpe2000_sp --config conf/train.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_bpe2000_sp/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train_sp/wav.scp,speech,sound --train_shape_file exp/asr_stats_raw_bpe2000_sp/train/speech_shape --fold_length 150 --train_data_path_and_name_and_type dump/raw/train_sp/text,text,text --train_shape_file exp/asr_stats_raw_bpe2000_sp/train/text_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/asr_stats_raw_bpe2000_sp/valid/text_shape.bpe --ngpu 1 --multiprocessing_distributed True
# Started at Fri May 12 16:50:15 CST 2023
#
/mnt/bd/khassan-volume3/tools/espent_KSC_recipe_test/tools/miniconda/envs/espnet/bin/python3 /mnt/bd/khassan-volume3/tools/espent_KSC_recipe_test/espnet2/bin/asr_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram2000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram2000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,sound --valid_shape_file exp/asr_stats_raw_bpe2000_sp/valid/speech_shape --resume true --ignore_init_mismatch false --fold_length 80000 --output_dir exp/asr_train_raw_bpe2000_sp --config conf/train.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_bpe2000_sp/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train_sp/wav.scp,speech,sound --train_shape_file exp/asr_stats_raw_bpe2000_sp/train/speech_shape --fold_length 150 --train_data_path_and_name_and_type dump/raw/train_sp/text,text,text --train_shape_file exp/asr_stats_raw_bpe2000_sp/train/text_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/asr_stats_raw_bpe2000_sp/valid/text_shape.bpe --ngpu 1 --multiprocessing_distributed True
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:22,540 (asr:500) INFO: Vocabulary size: 2000
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,222 (initialize:88) INFO: Initialize encoder.embed.conv.0.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.embed.conv.2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.embed.out.0.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.0.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.1.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.2.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.3.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,223 (initialize:88) INFO: Initialize encoder.encoders.3.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.3.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.3.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.3.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.3.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.3.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.3.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.4.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.5.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.6.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.7.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.7.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.7.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,224 (initialize:88) INFO: Initialize encoder.encoders.7.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.7.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.7.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.7.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.7.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.8.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.9.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.10.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.11.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.11.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.11.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.11.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.11.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,225 (initialize:88) INFO: Initialize encoder.encoders.11.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize encoder.encoders.11.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize encoder.encoders.11.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize encoder.after_norm.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.after_norm.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.output_layer.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.src_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.src_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.src_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.src_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.0.norm3.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.src_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.src_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.src_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.src_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.1.norm3.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.2.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.2.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,226 (initialize:88) INFO: Initialize decoder.decoders.2.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.src_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.src_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.src_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.src_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.2.norm3.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.src_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.src_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.src_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.src_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.3.norm3.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.src_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.src_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.src_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.src_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,227 (initialize:88) INFO: Initialize decoder.decoders.4.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.4.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.4.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.4.norm3.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.self_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.self_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.self_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.self_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.src_attn.linear_q.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.src_attn.linear_k.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.src_attn.linear_v.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.src_attn.linear_out.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.feed_forward.w_1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.feed_forward.w_2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.norm1.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.norm2.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize decoder.decoders.5.norm3.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:23,228 (initialize:88) INFO: Initialize ctc.ctc_lo.bias to zeros
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:25,563 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:25,568 (abs_task:1202) INFO: Model structure:
ESPnetASRModel(
(frontend): DefaultFrontend(
(stft): Stft(n_fft=512, win_length=512, hop_length=128, center=True, normalized=False, onesided=True)
(frontend): Frontend()
(logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
)
(specaug): SpecAug(
(time_warp): TimeWarp(window=5, mode=bicubic)
(freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
(time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
)
(normalize): GlobalMVN(stats_file=exp/asr_stats_raw_bpe2000_sp/train/feats_stats.npz, norm_means=True, norm_vars=True)
(encoder): TransformerEncoder(
(embed): Conv2dSubsampling(
(conv): Sequential(
(0): Conv2d(1, 256, kernel_size=(3, 3), stride=(2, 2))
(1): ReLU()
(2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
(3): ReLU()
)
(out): Sequential(
(0): Linear(in_features=4864, out_features=256, bias=True)
(1): PositionalEncoding(
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(encoders): MultiSequential(
(0): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(1): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(2): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(3): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(4): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(5): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(6): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(7): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(8): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(9): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(10): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(11): EncoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
)
(decoder): TransformerDecoder(
(embed): Sequential(
(0): Embedding(2000, 256)
(1): PositionalEncoding(
(dropout): Dropout(p=0.1, inplace=False)
)
)
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(output_layer): Linear(in_features=256, out_features=2000, bias=True)
(decoders): MultiSequential(
(0): DecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(src_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(1): DecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(src_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(2): DecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(src_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(3): DecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(src_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(4): DecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(src_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(5): DecoderLayer(
(self_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(src_attn): MultiHeadedAttention(
(linear_q): Linear(in_features=256, out_features=256, bias=True)
(linear_k): Linear(in_features=256, out_features=256, bias=True)
(linear_v): Linear(in_features=256, out_features=256, bias=True)
(linear_out): Linear(in_features=256, out_features=256, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(feed_forward): PositionwiseFeedForward(
(w_1): Linear(in_features=256, out_features=2048, bias=True)
(w_2): Linear(in_features=2048, out_features=256, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(activation): ReLU()
)
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(criterion_att): LabelSmoothingLoss(
(criterion): KLDivLoss()
)
(ctc): CTC(
(ctc_lo): Linear(in_features=256, out_features=2000, bias=True)
(ctc_loss): CTCLoss()
)
)
Model summary:
Class Name: ESPnetASRModel
Total Number of model parameters: 28.63 M
Number of trainable parameters: 28.63 M (100.0%)
Size: 114.53 MB
Type: torch.float32
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:25,568 (abs_task:1205) INFO: Optimizer:
Adam (
Parameter Group 0
amsgrad: False
betas: (0.9, 0.999)
capturable: False
differentiable: False
eps: 1e-08
foreach: None
fused: False
initial_lr: 0.0001
lr: 3.3333333333333334e-09
maximize: False
weight_decay: 0
)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:25,568 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=30000)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:25,569 (abs_task:1215) INFO: Saving the configuration in exp/asr_train_raw_bpe2000_sp/config.yaml
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:26,799 (asr:471) INFO: Optional Data Names: ('text_spk2', 'text_spk3', 'text_spk4')
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,310 (abs_task:1570) INFO: [train] dataset:
ESPnetDataset(
speech: {"path": "dump/raw/train_sp/wav.scp", "type": "sound"}
text: {"path": "dump/raw/train_sp/text", "type": "text"}
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f4f7fa83940>)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,311 (abs_task:1571) INFO: [train] Batch sampler: FoldedBatchSampler(N-batch=7161, batch_size=128, shape_files=['exp/asr_stats_raw_bpe2000_sp/train/speech_shape', 'exp/asr_stats_raw_bpe2000_sp/train/text_shape.bpe'], sort_in_batch=descending, sort_batch=descending)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,312 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=7161, mean=61.7, min=12, max=128
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,368 (asr:471) INFO: Optional Data Names: ('text_spk2', 'text_spk3', 'text_spk4')
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,381 (abs_task:1570) INFO: [valid] dataset:
ESPnetDataset(
speech: {"path": "dump/raw/dev/wav.scp", "type": "sound"}
text: {"path": "dump/raw/dev/text", "type": "text"}
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f4f911692b0>)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,381 (abs_task:1571) INFO: [valid] Batch sampler: FoldedBatchSampler(N-batch=53, batch_size=128, shape_files=['exp/asr_stats_raw_bpe2000_sp/valid/speech_shape', 'exp/asr_stats_raw_bpe2000_sp/valid/text_shape.bpe'], sort_in_batch=descending, sort_batch=descending)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,381 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=53, mean=61.9, min=29, max=128
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,388 (asr:471) INFO: Optional Data Names: ('text_spk2', 'text_spk3', 'text_spk4')
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,411 (abs_task:1570) INFO: [plot_att] dataset:
ESPnetDataset(
speech: {"path": "dump/raw/dev/wav.scp", "type": "sound"}
text: {"path": "dump/raw/dev/text", "type": "text"}
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f4f7fa83e20>)
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,411 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=3283, batch_size=1, key_file=exp/asr_stats_raw_bpe2000_sp/valid/speech_shape,
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,411 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:29,767 (trainer:284) INFO: 1/100epoch started
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:36,110 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:36,379 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:36,568 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:36,776 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:37,016 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:37,511 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:38,909 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:50:58,909 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:51:15,156 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:52:15,035 (trainer:732) INFO: 1epoch:train:1-358batch: iter_time=0.038, forward_time=0.110, loss_ctc=1.223e+03, loss_att=151.850, acc=5.081e-04, loss=473.052, backward_time=0.050, optim0_lr0=5.739e-07, train_time=0.294, grad_norm=1.545e+03, clip=100.000, loss_scale=238.762, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:53:50,347 (trainer:732) INFO: 1epoch:train:359-716batch: iter_time=0.026, forward_time=0.097, loss_ctc=360.904, loss_att=151.174, acc=0.003, loss=214.093, backward_time=0.048, optim0_lr0=1.765e-06, train_time=0.266, grad_norm=745.873, clip=100.000, loss_scale=128.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:55:26,792 (trainer:732) INFO: 1epoch:train:717-1074batch: iter_time=0.020, forward_time=0.103, loss_ctc=261.746, loss_att=152.868, acc=0.034, loss=185.532, backward_time=0.048, optim0_lr0=2.958e-06, train_time=0.269, grad_norm=517.623, clip=100.000, loss_scale=128.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:57:04,509 (trainer:732) INFO: 1epoch:train:1075-1432batch: iter_time=0.025, forward_time=0.103, loss_ctc=198.610, loss_att=140.436, acc=0.050, loss=157.888, backward_time=0.047, optim0_lr0=4.152e-06, train_time=0.273, grad_norm=285.924, clip=100.000, loss_scale=128.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:57:08,777 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 16:58:39,914 (trainer:732) INFO: 1epoch:train:1433-1790batch: iter_time=0.022, forward_time=0.101, loss_ctc=166.016, loss_att=133.949, acc=0.052, loss=143.569, backward_time=0.047, optim0_lr0=5.345e-06, train_time=0.266, grad_norm=109.777, clip=100.000, loss_scale=128.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:00:17,600 (trainer:732) INFO: 1epoch:train:1791-2148batch: iter_time=0.027, forward_time=0.102, loss_ctc=150.882, loss_att=128.920, acc=0.052, loss=135.509, backward_time=0.047, optim0_lr0=6.538e-06, train_time=0.273, grad_norm=22.196, clip=99.721, loss_scale=131.218, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:01:53,739 (trainer:732) INFO: 1epoch:train:2149-2506batch: iter_time=0.023, forward_time=0.102, loss_ctc=152.593, loss_att=131.054, acc=0.051, loss=137.516, backward_time=0.047, optim0_lr0=7.732e-06, train_time=0.268, grad_norm=17.164, clip=100.000, loss_scale=256.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:03:29,824 (trainer:732) INFO: 1epoch:train:2507-2864batch: iter_time=0.020, forward_time=0.103, loss_ctc=154.716, loss_att=132.444, acc=0.053, loss=139.126, backward_time=0.047, optim0_lr0=8.925e-06, train_time=0.268, grad_norm=17.622, clip=100.000, loss_scale=256.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:05:06,511 (trainer:732) INFO: 1epoch:train:2865-3222batch: iter_time=0.026, forward_time=0.101, loss_ctc=148.715, loss_att=126.999, acc=0.057, loss=133.514, backward_time=0.047, optim0_lr0=1.012e-05, train_time=0.270, grad_norm=15.482, clip=100.000, loss_scale=256.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:06:45,463 (trainer:732) INFO: 1epoch:train:3223-3580batch: iter_time=0.027, forward_time=0.104, loss_ctc=153.417, loss_att=130.641, acc=0.056, loss=137.474, backward_time=0.047, optim0_lr0=1.131e-05, train_time=0.276, grad_norm=15.705, clip=100.000, loss_scale=256.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:07:06,972 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:08:22,355 (trainer:732) INFO: 1epoch:train:3581-3938batch: iter_time=0.021, forward_time=0.104, loss_ctc=155.755, loss_att=132.144, acc=0.057, loss=139.228, backward_time=0.047, optim0_lr0=1.251e-05, train_time=0.270, grad_norm=16.703, clip=100.000, loss_scale=256.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:09:58,712 (trainer:732) INFO: 1epoch:train:3939-4296batch: iter_time=0.024, forward_time=0.101, loss_ctc=151.475, loss_att=127.958, acc=0.061, loss=135.013, backward_time=0.047, optim0_lr0=1.370e-05, train_time=0.269, grad_norm=16.412, clip=99.441, loss_scale=368.268, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:11:38,626 (trainer:732) INFO: 1epoch:train:4297-4654batch: iter_time=0.032, forward_time=0.103, loss_ctc=145.097, loss_att=122.081, acc=0.066, loss=128.986, backward_time=0.047, optim0_lr0=1.489e-05, train_time=0.279, grad_norm=15.341, clip=99.441, loss_scale=512.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:13:14,980 (trainer:732) INFO: 1epoch:train:4655-5012batch: iter_time=0.016, forward_time=0.105, loss_ctc=160.518, loss_att=134.381, acc=0.065, loss=142.222, backward_time=0.047, optim0_lr0=1.608e-05, train_time=0.269, grad_norm=18.137, clip=99.721, loss_scale=512.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:14:51,697 (trainer:732) INFO: 1epoch:train:5013-5370batch: iter_time=0.024, forward_time=0.102, loss_ctc=148.372, loss_att=123.557, acc=0.071, loss=131.002, backward_time=0.047, optim0_lr0=1.728e-05, train_time=0.270, grad_norm=16.531, clip=99.721, loss_scale=512.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:16:29,739 (trainer:732) INFO: 1epoch:train:5371-5728batch: iter_time=0.024, forward_time=0.104, loss_ctc=151.674, loss_att=125.362, acc=0.073, loss=133.255, backward_time=0.047, optim0_lr0=1.847e-05, train_time=0.274, grad_norm=16.514, clip=99.441, loss_scale=512.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:18:05,954 (trainer:732) INFO: 1epoch:train:5729-6086batch: iter_time=0.023, forward_time=0.102, loss_ctc=148.351, loss_att=121.680, acc=0.077, loss=129.681, backward_time=0.047, optim0_lr0=1.966e-05, train_time=0.269, grad_norm=16.646, clip=99.721, loss_scale=512.000, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:19:10,401 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:19:42,385 (trainer:732) INFO: 1epoch:train:6087-6444batch: iter_time=0.025, forward_time=0.101, loss_ctc=147.037, loss_att=119.534, acc=0.082, loss=127.785, backward_time=0.047, optim0_lr0=2.086e-05, train_time=0.269, grad_norm=16.936, clip=100.000, loss_scale=948.201, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:21:20,753 (trainer:732) INFO: 1epoch:train:6445-6802batch: iter_time=0.025, forward_time=0.104, loss_ctc=147.720, loss_att=119.081, acc=0.085, loss=127.672, backward_time=0.047, optim0_lr0=2.205e-05, train_time=0.275, grad_norm=19.001, clip=99.441, loss_scale=1.024e+03, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:22:56,927 (trainer:732) INFO: 1epoch:train:6803-7160batch: iter_time=0.022, forward_time=0.103, loss_ctc=150.074, loss_att=119.932, acc=0.087, loss=128.975, backward_time=0.047, optim0_lr0=2.324e-05, train_time=0.268, grad_norm=17.965, clip=99.162, loss_scale=1.024e+03, optim_step_time=0.027
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:24:07,139 (trainer:338) INFO: 1epoch results: [train] iter_time=0.025, forward_time=0.103, loss_ctc=223.614, loss_att=131.181, acc=0.057, loss=158.911, backward_time=0.047, optim0_lr0=1.191e-05, train_time=0.272, grad_norm=171.368, clip=99.790, loss_scale=404.617, optim_step_time=0.027, time=32 minutes and 27.77 seconds, total_count=7161, gpu_max_cached_mem_GB=26.719, [valid] loss_ctc=147.095, cer_ctc=0.957, loss_att=114.699, acc=0.094, cer=0.793, wer=1.000, loss=124.418, time=15.6 seconds, total_count=53, gpu_max_cached_mem_GB=30.113, [att_plot] time=53.98 seconds, total_count=0, gpu_max_cached_mem_GB=30.113
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:24:10,883 (trainer:386) INFO: The best model has been updated: valid.acc
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:24:10,883 (trainer:272) INFO: 2/100epoch started. Estimated time to finish: 2 days, 7 hours and 34 minutes
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:25:42,078 (trainer:732) INFO: 2epoch:train:1-358batch: iter_time=0.010, forward_time=0.099, loss_ctc=146.897, loss_att=116.455, acc=0.090, loss=125.588, backward_time=0.051, grad_norm=18.028, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=2.444e-05, train_time=0.254
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:27:12,224 (trainer:732) INFO: 2epoch:train:359-716batch: iter_time=0.005, forward_time=0.100, loss_ctc=153.089, loss_att=120.195, acc=0.092, loss=130.063, backward_time=0.051, grad_norm=19.505, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=2.564e-05, train_time=0.252
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:28:42,035 (trainer:732) INFO: 2epoch:train:717-1074batch: iter_time=0.004, forward_time=0.100, loss_ctc=150.953, loss_att=117.658, acc=0.096, loss=127.646, backward_time=0.052, grad_norm=18.515, clip=100.000, loss_scale=1.299e+03, optim_step_time=0.027, optim0_lr0=2.683e-05, train_time=0.251
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:30:13,348 (trainer:732) INFO: 2epoch:train:1075-1432batch: iter_time=0.007, forward_time=0.101, loss_ctc=150.922, loss_att=116.737, acc=0.097, loss=126.992, backward_time=0.052, grad_norm=21.376, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=2.802e-05, train_time=0.255
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:31:45,909 (trainer:732) INFO: 2epoch:train:1433-1790batch: iter_time=0.010, forward_time=0.101, loss_ctc=151.956, loss_att=116.632, acc=0.101, loss=127.229, backward_time=0.052, grad_norm=20.114, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=2.922e-05, train_time=0.258
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:33:17,481 (trainer:732) INFO: 2epoch:train:1791-2148batch: iter_time=0.009, forward_time=0.100, loss_ctc=146.070, loss_att=111.379, acc=0.103, loss=121.786, backward_time=0.053, grad_norm=21.288, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=3.041e-05, train_time=0.256
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:34:49,084 (trainer:732) INFO: 2epoch:train:2149-2506batch: iter_time=0.008, forward_time=0.100, loss_ctc=150.547, loss_att=113.894, acc=0.105, loss=124.890, backward_time=0.052, grad_norm=21.959, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=3.160e-05, train_time=0.256
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:36:09,811 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:36:22,088 (trainer:732) INFO: 2epoch:train:2507-2864batch: iter_time=0.013, forward_time=0.100, loss_ctc=146.740, loss_att=110.302, acc=0.108, loss=121.233, backward_time=0.052, grad_norm=23.392, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=3.279e-05, train_time=0.260
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:37:54,991 (trainer:732) INFO: 2epoch:train:2865-3222batch: iter_time=0.014, forward_time=0.099, loss_ctc=149.256, loss_att=111.455, acc=0.110, loss=122.795, backward_time=0.053, grad_norm=22.771, clip=100.000, loss_scale=3.444e+03, optim_step_time=0.027, optim0_lr0=3.399e-05, train_time=0.259
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:39:29,653 (trainer:732) INFO: 2epoch:train:3223-3580batch: iter_time=0.014, forward_time=0.101, loss_ctc=152.095, loss_att=112.960, acc=0.112, loss=124.700, backward_time=0.054, grad_norm=24.803, clip=100.000, loss_scale=4.096e+03, optim_step_time=0.027, optim0_lr0=3.518e-05, train_time=0.264
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:40:06,950 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:41:05,780 (trainer:732) INFO: 2epoch:train:3581-3938batch: iter_time=0.020, forward_time=0.100, loss_ctc=153.237, loss_att=113.222, acc=0.113, loss=125.227, backward_time=0.053, grad_norm=25.447, clip=100.000, loss_scale=4.096e+03, optim_step_time=0.027, optim0_lr0=3.637e-05, train_time=0.268
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:42:39,485 (trainer:732) INFO: 2epoch:train:3939-4296batch: iter_time=0.018, forward_time=0.098, loss_ctc=147.070, loss_att=108.142, acc=0.116, loss=119.820, backward_time=0.052, grad_norm=24.460, clip=100.000, loss_scale=4.096e+03, optim_step_time=0.027, optim0_lr0=3.757e-05, train_time=0.261
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:43:18,222 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:44:13,361 (trainer:732) INFO: 2epoch:train:4297-4654batch: iter_time=0.015, forward_time=0.100, loss_ctc=151.242, loss_att=110.422, acc=0.118, loss=122.668, backward_time=0.052, grad_norm=28.633, clip=100.000, loss_scale=4.096e+03, optim_step_time=0.027, optim0_lr0=3.876e-05, train_time=0.262
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:45:46,881 (trainer:732) INFO: 2epoch:train:4655-5012batch: iter_time=0.019, forward_time=0.098, loss_ctc=142.303, loss_att=103.666, acc=0.120, loss=115.257, backward_time=0.051, grad_norm=24.655, clip=100.000, loss_scale=4.485e+03, optim_step_time=0.027, optim0_lr0=3.995e-05, train_time=0.261
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:45:59,697 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:47:22,056 (trainer:732) INFO: 2epoch:train:5013-5370batch: iter_time=0.019, forward_time=0.100, loss_ctc=146.878, loss_att=106.419, acc=0.122, loss=118.557, backward_time=0.052, grad_norm=25.764, clip=100.000, loss_scale=4.635e+03, optim_step_time=0.027, optim0_lr0=4.115e-05, train_time=0.266
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:48:40,861 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:48:57,097 (trainer:732) INFO: 2epoch:train:5371-5728batch: iter_time=0.019, forward_time=0.100, loss_ctc=152.028, loss_att=109.557, acc=0.122, loss=122.298, backward_time=0.052, grad_norm=26.820, clip=100.000, loss_scale=3.740e+03, optim_step_time=0.027, optim0_lr0=4.234e-05, train_time=0.265
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:50:31,576 (trainer:732) INFO: 2epoch:train:5729-6086batch: iter_time=0.020, forward_time=0.099, loss_ctc=146.130, loss_att=104.878, acc=0.125, loss=117.254, backward_time=0.052, grad_norm=32.007, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=4.353e-05, train_time=0.264
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:52:06,515 (trainer:732) INFO: 2epoch:train:6087-6444batch: iter_time=0.020, forward_time=0.100, loss_ctc=146.222, loss_att=104.518, acc=0.127, loss=117.030, backward_time=0.052, grad_norm=29.694, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=4.472e-05, train_time=0.265
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:53:43,431 (trainer:732) INFO: 2epoch:train:6445-6802batch: iter_time=0.022, forward_time=0.101, loss_ctc=149.954, loss_att=106.829, acc=0.126, loss=119.766, backward_time=0.052, grad_norm=30.724, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=4.591e-05, train_time=0.270
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:55:21,190 (trainer:732) INFO: 2epoch:train:6803-7160batch: iter_time=0.028, forward_time=0.099, loss_ctc=144.162, loss_att=102.320, acc=0.129, loss=114.872, backward_time=0.052, grad_norm=25.842, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=4.711e-05, train_time=0.273
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:56:29,521 (trainer:338) INFO: 2epoch results: [train] iter_time=0.015, forward_time=0.100, loss_ctc=148.856, loss_att=110.836, acc=0.112, loss=122.242, backward_time=0.052, grad_norm=24.288, clip=100.000, loss_scale=2.723e+03, optim_step_time=0.027, optim0_lr0=3.578e-05, train_time=0.261, time=31 minutes and 11.1 seconds, total_count=14322, gpu_max_cached_mem_GB=30.113, [valid] loss_ctc=145.373, cer_ctc=0.936, loss_att=99.962, acc=0.139, cer=0.750, wer=1.000, loss=113.585, time=15.26 seconds, total_count=106, gpu_max_cached_mem_GB=30.113, [att_plot] time=52.27 seconds, total_count=0, gpu_max_cached_mem_GB=30.113
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:56:32,796 (trainer:386) INFO: The best model has been updated: valid.acc
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:56:32,797 (trainer:272) INFO: 3/100epoch started. Estimated time to finish: 2 days, 5 hours and 56 minutes
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:58:03,675 (trainer:732) INFO: 3epoch:train:1-358batch: iter_time=0.006, forward_time=0.100, loss_ctc=156.044, loss_att=110.136, acc=0.127, loss=123.909, backward_time=0.052, grad_norm=33.020, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=4.830e-05, train_time=0.254
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:58:41,793 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:59:20,227 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 17:59:34,162 (trainer:732) INFO: 3epoch:train:359-716batch: iter_time=0.008, forward_time=0.098, loss_ctc=145.890, loss_att=102.699, acc=0.130, loss=115.656, backward_time=0.052, grad_norm=32.750, clip=100.000, loss_scale=2.065e+03, optim_step_time=0.027, optim0_lr0=4.950e-05, train_time=0.253
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:01:04,326 (trainer:732) INFO: 3epoch:train:717-1074batch: iter_time=0.008, forward_time=0.098, loss_ctc=148.426, loss_att=104.155, acc=0.130, loss=117.437, backward_time=0.052, grad_norm=30.447, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=5.069e-05, train_time=0.252
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:02:34,948 (trainer:732) INFO: 3epoch:train:1075-1432batch: iter_time=0.007, forward_time=0.099, loss_ctc=149.702, loss_att=104.720, acc=0.132, loss=118.214, backward_time=0.053, grad_norm=27.374, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=5.188e-05, train_time=0.253
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:04:06,392 (trainer:732) INFO: 3epoch:train:1433-1790batch: iter_time=0.012, forward_time=0.098, loss_ctc=143.287, loss_att=99.981, acc=0.134, loss=112.973, backward_time=0.052, grad_norm=30.144, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=5.307e-05, train_time=0.255
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:05:36,934 (trainer:732) INFO: 3epoch:train:1791-2148batch: iter_time=0.009, forward_time=0.098, loss_ctc=147.641, loss_att=102.730, acc=0.134, loss=116.203, backward_time=0.052, grad_norm=32.581, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=5.427e-05, train_time=0.253
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:07:08,275 (trainer:732) INFO: 3epoch:train:2149-2506batch: iter_time=0.012, forward_time=0.098, loss_ctc=148.194, loss_att=102.766, acc=0.134, loss=116.394, backward_time=0.052, grad_norm=28.703, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=5.546e-05, train_time=0.255
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:08:40,090 (trainer:732) INFO: 3epoch:train:2507-2864batch: iter_time=0.014, forward_time=0.097, loss_ctc=143.604, loss_att=99.383, acc=0.135, loss=112.649, backward_time=0.052, grad_norm=32.159, clip=100.000, loss_scale=4.079e+03, optim_step_time=0.027, optim0_lr0=5.665e-05, train_time=0.256
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:09:57,253 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:10:12,546 (trainer:732) INFO: 3epoch:train:2865-3222batch: iter_time=0.012, forward_time=0.099, loss_ctc=152.945, loss_att=105.353, acc=0.135, loss=119.631, backward_time=0.052, grad_norm=30.932, clip=100.000, loss_scale=4.096e+03, optim_step_time=0.027, optim0_lr0=5.785e-05, train_time=0.258
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:11:46,066 (trainer:732) INFO: 3epoch:train:3223-3580batch: iter_time=0.021, forward_time=0.096, loss_ctc=143.083, loss_att=98.410, acc=0.139, loss=111.812, backward_time=0.052, grad_norm=26.177, clip=100.000, loss_scale=4.096e+03, optim_step_time=0.027, optim0_lr0=5.904e-05, train_time=0.261
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:12:29,695 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:12:50,798 (trainer:663) WARNING: The grad norm is inf. Skipping updating the model.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:13:20,870 (trainer:732) INFO: 3epoch:train:3581-3938batch: iter_time=0.013, forward_time=0.101, loss_ctc=155.241, loss_att=106.349, acc=0.136, loss=121.017, backward_time=0.052, grad_norm=35.155, clip=100.000, loss_scale=2.669e+03, optim_step_time=0.027, optim0_lr0=6.023e-05, train_time=0.265
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:14:22,178 (preprocessor:336) WARNING: The length of the text output exceeds 100, which may cause OOM on the GPU.Please ensure that the data processing is correct and verify it.
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:14:53,120 (trainer:732) INFO: 3epoch:train:3939-4296batch: iter_time=0.017, forward_time=0.096, loss_ctc=144.165, loss_att=98.495, acc=0.140, loss=112.196, backward_time=0.052, grad_norm=34.468, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=6.142e-05, train_time=0.257
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:16:26,478 (trainer:732) INFO: 3epoch:train:4297-4654batch: iter_time=0.017, forward_time=0.098, loss_ctc=147.600, loss_att=100.696, acc=0.139, loss=114.768, backward_time=0.052, grad_norm=30.049, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=6.261e-05, train_time=0.261
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:18:00,486 (trainer:732) INFO: 3epoch:train:4655-5012batch: iter_time=0.018, forward_time=0.098, loss_ctc=145.332, loss_att=98.965, acc=0.141, loss=112.875, backward_time=0.052, grad_norm=28.210, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=6.381e-05, train_time=0.262
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:19:34,714 (trainer:732) INFO: 3epoch:train:5013-5370batch: iter_time=0.016, forward_time=0.099, loss_ctc=153.903, loss_att=104.304, acc=0.139, loss=119.184, backward_time=0.052, grad_norm=26.813, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=6.500e-05, train_time=0.263
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:21:09,395 (trainer:732) INFO: 3epoch:train:5371-5728batch: iter_time=0.020, forward_time=0.098, loss_ctc=152.415, loss_att=103.090, acc=0.139, loss=117.887, backward_time=0.052, grad_norm=28.828, clip=100.000, loss_scale=1.024e+03, optim_step_time=0.027, optim0_lr0=6.619e-05, train_time=0.264
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:22:43,299 (trainer:732) INFO: 3epoch:train:5729-6086batch: iter_time=0.017, forward_time=0.098, loss_ctc=152.646, loss_att=103.069, acc=0.141, loss=117.942, backward_time=0.051, grad_norm=29.594, clip=100.000, loss_scale=1.768e+03, optim_step_time=0.027, optim0_lr0=6.739e-05, train_time=0.262
[mlxlabq1l19yow63f8475a-20230224051258-1mabjw-hfy9o5-worker] 2023-05-12 18:24:19,504 (trainer:732) INFO: 3epoch:train:6087-6444batch: iter_time=0.021, forward_time=0.100, loss_ctc=147.020, loss_att=98.934, acc=0.144, loss=113.359, backward_time=0.051, grad_norm=28.379, clip=100.000, loss_scale=2.048e+03, optim_step_time=0.027, optim0_lr0=6.858e-05, train_time=0.268