karthik commited on
Commit
4652ca8
1 Parent(s): 4bddd2d

Update Model

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. exp/asr_stats_raw_en_word/logdir/q/stats.log +1 -0
  2. exp/asr_stats_raw_en_word/logdir/q/stats.sh +29 -0
  3. exp/asr_stats_raw_en_word/logdir/stats.1.log +495 -0
  4. exp/asr_stats_raw_en_word/logdir/stats.1/config.yaml +231 -0
  5. exp/asr_stats_raw_en_word/logdir/stats.1/train/batch_keys +2 -0
  6. exp/asr_stats_raw_en_word/logdir/stats.1/train/feats_lengths_stats.npz +0 -0
  7. exp/asr_stats_raw_en_word/logdir/stats.1/train/feats_stats.npz +0 -0
  8. exp/asr_stats_raw_en_word/logdir/stats.1/train/speech_shape +191 -0
  9. exp/asr_stats_raw_en_word/logdir/stats.1/train/stats_keys +2 -0
  10. exp/asr_stats_raw_en_word/logdir/stats.1/train/text_shape +191 -0
  11. exp/asr_stats_raw_en_word/logdir/stats.1/valid/batch_keys +2 -0
  12. exp/asr_stats_raw_en_word/logdir/stats.1/valid/feats_lengths_stats.npz +0 -0
  13. exp/asr_stats_raw_en_word/logdir/stats.1/valid/feats_stats.npz +0 -0
  14. exp/asr_stats_raw_en_word/logdir/stats.1/valid/speech_shape +24 -0
  15. exp/asr_stats_raw_en_word/logdir/stats.1/valid/stats_keys +2 -0
  16. exp/asr_stats_raw_en_word/logdir/stats.1/valid/text_shape +24 -0
  17. exp/asr_stats_raw_en_word/logdir/stats.10.log +495 -0
  18. exp/asr_stats_raw_en_word/logdir/stats.10/config.yaml +231 -0
  19. exp/asr_stats_raw_en_word/logdir/stats.10/train/batch_keys +2 -0
  20. exp/asr_stats_raw_en_word/logdir/stats.10/train/feats_lengths_stats.npz +0 -0
  21. exp/asr_stats_raw_en_word/logdir/stats.10/train/feats_stats.npz +0 -0
  22. exp/asr_stats_raw_en_word/logdir/stats.10/train/speech_shape +191 -0
  23. exp/asr_stats_raw_en_word/logdir/stats.10/train/stats_keys +2 -0
  24. exp/asr_stats_raw_en_word/logdir/stats.10/train/text_shape +191 -0
  25. exp/asr_stats_raw_en_word/logdir/stats.10/valid/batch_keys +2 -0
  26. exp/asr_stats_raw_en_word/logdir/stats.10/valid/feats_lengths_stats.npz +0 -0
  27. exp/asr_stats_raw_en_word/logdir/stats.10/valid/feats_stats.npz +0 -0
  28. exp/asr_stats_raw_en_word/logdir/stats.10/valid/speech_shape +24 -0
  29. exp/asr_stats_raw_en_word/logdir/stats.10/valid/stats_keys +2 -0
  30. exp/asr_stats_raw_en_word/logdir/stats.10/valid/text_shape +24 -0
  31. exp/asr_stats_raw_en_word/logdir/stats.11.log +495 -0
  32. exp/asr_stats_raw_en_word/logdir/stats.11/config.yaml +231 -0
  33. exp/asr_stats_raw_en_word/logdir/stats.11/train/batch_keys +2 -0
  34. exp/asr_stats_raw_en_word/logdir/stats.11/train/feats_lengths_stats.npz +0 -0
  35. exp/asr_stats_raw_en_word/logdir/stats.11/train/feats_stats.npz +0 -0
  36. exp/asr_stats_raw_en_word/logdir/stats.11/train/speech_shape +191 -0
  37. exp/asr_stats_raw_en_word/logdir/stats.11/train/stats_keys +2 -0
  38. exp/asr_stats_raw_en_word/logdir/stats.11/train/text_shape +191 -0
  39. exp/asr_stats_raw_en_word/logdir/stats.11/valid/batch_keys +2 -0
  40. exp/asr_stats_raw_en_word/logdir/stats.11/valid/feats_lengths_stats.npz +0 -0
  41. exp/asr_stats_raw_en_word/logdir/stats.11/valid/feats_stats.npz +0 -0
  42. exp/asr_stats_raw_en_word/logdir/stats.11/valid/speech_shape +24 -0
  43. exp/asr_stats_raw_en_word/logdir/stats.11/valid/stats_keys +2 -0
  44. exp/asr_stats_raw_en_word/logdir/stats.11/valid/text_shape +24 -0
  45. exp/asr_stats_raw_en_word/logdir/stats.12.log +495 -0
  46. exp/asr_stats_raw_en_word/logdir/stats.12/config.yaml +231 -0
  47. exp/asr_stats_raw_en_word/logdir/stats.12/train/batch_keys +2 -0
  48. exp/asr_stats_raw_en_word/logdir/stats.12/train/feats_lengths_stats.npz +0 -0
  49. exp/asr_stats_raw_en_word/logdir/stats.12/train/feats_stats.npz +0 -0
  50. exp/asr_stats_raw_en_word/logdir/stats.12/train/speech_shape +191 -0
exp/asr_stats_raw_en_word/logdir/q/stats.log ADDED
@@ -0,0 +1 @@
 
 
1
+ Submitted batch job 5730432
exp/asr_stats_raw_en_word/logdir/q/stats.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ cd /ocean/projects/cis210027p/ganesank/karthik_new/espnet/egs2/sinhala/asr1
3
+ . ./path.sh
4
+ ( echo '#' Running on `hostname`
5
+ echo '#' Started at `date`
6
+ set | grep SLURM | while read line; do echo "# $line"; done
7
+ echo -n '# '; cat <<EOF
8
+ python3 -m espnet2.bin.asr_train --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.${SLURM_ARRAY_TASK_ID}.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.${SLURM_ARRAY_TASK_ID}.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.${SLURM_ARRAY_TASK_ID} --config conf/train_asr.yaml --frontend_conf fs=16k
9
+ EOF
10
+ ) >exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
11
+ if [ "$CUDA_VISIBLE_DEVICES" == "NoDevFiles" ]; then
12
+ ( echo CUDA_VISIBLE_DEVICES set to NoDevFiles, unsetting it...
13
+ )>>exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
14
+ unset CUDA_VISIBLE_DEVICES
15
+ fi
16
+ time1=`date +"%s"`
17
+ ( python3 -m espnet2.bin.asr_train --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.${SLURM_ARRAY_TASK_ID}.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.${SLURM_ARRAY_TASK_ID}.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.${SLURM_ARRAY_TASK_ID} --config conf/train_asr.yaml --frontend_conf fs=16k ) &>>exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
18
+ ret=$?
19
+ sync || true
20
+ time2=`date +"%s"`
21
+ echo '#' Accounting: begin_time=$time1 >>exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
22
+ echo '#' Accounting: end_time=$time2 >>exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
23
+ echo '#' Accounting: time=$(($time2-$time1)) threads=1 >>exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
24
+ echo '#' Finished at `date` with status $ret >>exp/asr_stats_raw_en_word/logdir/stats.$SLURM_ARRAY_TASK_ID.log
25
+ [ $ret -eq 137 ] && exit 100;
26
+ touch exp/asr_stats_raw_en_word/logdir/q/done.1797611.$SLURM_ARRAY_TASK_ID
27
+ exit $[$ret ? 1 : 0]
28
+ ## submitted with:
29
+ # sbatch --export=PATH --time 10:00:00 -p RM-shared --open-mode=append -e exp/asr_stats_raw_en_word/logdir/q/stats.log -o exp/asr_stats_raw_en_word/logdir/q/stats.log --array 1-32 /ocean/projects/cis210027p/ganesank/karthik_new/espnet/egs2/sinhala/asr1/exp/asr_stats_raw_en_word/logdir/q/stats.sh >>exp/asr_stats_raw_en_word/logdir/q/stats.log 2>&1
exp/asr_stats_raw_en_word/logdir/stats.1.log ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running on r041.ib.bridges2.psc.edu
2
+ # Started at Tue Dec 21 22:23:13 EST 2021
3
+ # SLURMD_NODENAME=r041
4
+ # SLURM_ARRAY_JOB_ID=5730432
5
+ # SLURM_ARRAY_TASK_COUNT=32
6
+ # SLURM_ARRAY_TASK_ID=1
7
+ # SLURM_ARRAY_TASK_MAX=32
8
+ # SLURM_ARRAY_TASK_MIN=1
9
+ # SLURM_ARRAY_TASK_STEP=1
10
+ # SLURM_CLUSTER_NAME=bridges2
11
+ # SLURM_CONF=/var/spool/slurm/d/conf-cache/slurm.conf
12
+ # SLURM_CPUS_ON_NODE=1
13
+ # SLURM_EXPORT_ENV=PATH
14
+ # SLURM_GET_USER_ENV=1
15
+ # SLURM_GTIDS=0
16
+ # SLURM_JOBID=5730448
17
+ # SLURM_JOB_ACCOUNT=cis210027p
18
+ # SLURM_JOB_CPUS_PER_NODE=1
19
+ # SLURM_JOB_GID=24886
20
+ # SLURM_JOB_ID=5730448
21
+ # SLURM_JOB_NAME=stats.sh
22
+ # SLURM_JOB_NODELIST=r041
23
+ # SLURM_JOB_NUM_NODES=1
24
+ # SLURM_JOB_PARTITION=RM-shared
25
+ # SLURM_JOB_QOS=rm
26
+ # SLURM_JOB_UID=82326
27
+ # SLURM_JOB_USER=ganesank
28
+ # SLURM_LOCALID=0
29
+ # SLURM_MEM_PER_CPU=2000
30
+ # SLURM_NNODES=1
31
+ # SLURM_NODEID=0
32
+ # SLURM_NODELIST=r041
33
+ # SLURM_NODE_ALIASES='(null)'
34
+ # SLURM_OPEN_MODE=a
35
+ # SLURM_PRIO_PROCESS=0
36
+ # SLURM_PROCID=0
37
+ # SLURM_SUBMIT_DIR=/ocean/projects/cis210027p/ganesank/karthik_new/espnet/egs2/sinhala/asr1
38
+ # SLURM_SUBMIT_HOST=br012.ib.bridges2.psc.edu
39
+ # SLURM_TASKS_PER_NODE=1
40
+ # SLURM_TASK_PID=98449
41
+ # SLURM_TOPOLOGY_ADDR=r041
42
+ # SLURM_TOPOLOGY_ADDR_PATTERN=node
43
+ # SLURM_WORKING_CLUSTER=bridges2:br003:6814:9216:109
44
+ # python3 -m espnet2.bin.asr_train --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.1.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.1.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.1 --config conf/train_asr.yaml --frontend_conf fs=16k
45
+ /ocean/projects/cis210027p/ganesank/karthik_new/espnet/tools/venv/bin/python3 /ocean/projects/cis210027p/ganesank/karthik_new/espnet/espnet2/bin/asr_train.py --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.1.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.1.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.1 --config conf/train_asr.yaml --frontend_conf fs=16k
46
+ [r041] 2021-12-21 22:24:17,989 (asr:382) INFO: Vocabulary size: 40
47
+ [r041] 2021-12-21 22:24:18,946 (abs_task:1132) INFO: pytorch.version=1.8.1+cu102, cuda.available=False, cudnn.version=7605, cudnn.benchmark=False, cudnn.deterministic=True
48
+ [r041] 2021-12-21 22:24:18,952 (abs_task:1133) INFO: Model structure:
49
+ ESPnetASRModel(
50
+ (frontend): DefaultFrontend(
51
+ (stft): Stft(n_fft=512, win_length=512, hop_length=128, center=True, normalized=False, onesided=True)
52
+ (frontend): Frontend()
53
+ (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
54
+ )
55
+ (specaug): SpecAug(
56
+ (time_warp): TimeWarp(window=5, mode=bicubic)
57
+ (freq_mask): MaskAlongAxis(mask_width_range=[0, 30], num_mask=2, axis=freq)
58
+ (time_mask): MaskAlongAxis(mask_width_range=[0, 40], num_mask=2, axis=time)
59
+ )
60
+ (normalize): UtteranceMVN(norm_means=True, norm_vars=False)
61
+ (encoder): TransformerEncoder(
62
+ (embed): Conv2dSubsampling(
63
+ (conv): Sequential(
64
+ (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(2, 2))
65
+ (1): ReLU()
66
+ (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
67
+ (3): ReLU()
68
+ )
69
+ (out): Sequential(
70
+ (0): Linear(in_features=4864, out_features=256, bias=True)
71
+ (1): PositionalEncoding(
72
+ (dropout): Dropout(p=0.1, inplace=False)
73
+ )
74
+ )
75
+ )
76
+ (encoders): MultiSequential(
77
+ (0): EncoderLayer(
78
+ (self_attn): MultiHeadedAttention(
79
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
80
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
81
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
82
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
83
+ (dropout): Dropout(p=0.0, inplace=False)
84
+ )
85
+ (feed_forward): PositionwiseFeedForward(
86
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
87
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
88
+ (dropout): Dropout(p=0.1, inplace=False)
89
+ (activation): ReLU()
90
+ )
91
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
92
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
93
+ (dropout): Dropout(p=0.1, inplace=False)
94
+ )
95
+ (1): EncoderLayer(
96
+ (self_attn): MultiHeadedAttention(
97
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
98
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
99
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
100
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
101
+ (dropout): Dropout(p=0.0, inplace=False)
102
+ )
103
+ (feed_forward): PositionwiseFeedForward(
104
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
105
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
106
+ (dropout): Dropout(p=0.1, inplace=False)
107
+ (activation): ReLU()
108
+ )
109
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
110
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
111
+ (dropout): Dropout(p=0.1, inplace=False)
112
+ )
113
+ (2): EncoderLayer(
114
+ (self_attn): MultiHeadedAttention(
115
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
116
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
117
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
118
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
119
+ (dropout): Dropout(p=0.0, inplace=False)
120
+ )
121
+ (feed_forward): PositionwiseFeedForward(
122
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
123
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
124
+ (dropout): Dropout(p=0.1, inplace=False)
125
+ (activation): ReLU()
126
+ )
127
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
128
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
129
+ (dropout): Dropout(p=0.1, inplace=False)
130
+ )
131
+ (3): EncoderLayer(
132
+ (self_attn): MultiHeadedAttention(
133
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
134
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
135
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
136
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
137
+ (dropout): Dropout(p=0.0, inplace=False)
138
+ )
139
+ (feed_forward): PositionwiseFeedForward(
140
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
141
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
142
+ (dropout): Dropout(p=0.1, inplace=False)
143
+ (activation): ReLU()
144
+ )
145
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
146
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
147
+ (dropout): Dropout(p=0.1, inplace=False)
148
+ )
149
+ (4): EncoderLayer(
150
+ (self_attn): MultiHeadedAttention(
151
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
152
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
153
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
154
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
155
+ (dropout): Dropout(p=0.0, inplace=False)
156
+ )
157
+ (feed_forward): PositionwiseFeedForward(
158
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
159
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
160
+ (dropout): Dropout(p=0.1, inplace=False)
161
+ (activation): ReLU()
162
+ )
163
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
164
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
165
+ (dropout): Dropout(p=0.1, inplace=False)
166
+ )
167
+ (5): EncoderLayer(
168
+ (self_attn): MultiHeadedAttention(
169
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
170
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
171
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
172
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
173
+ (dropout): Dropout(p=0.0, inplace=False)
174
+ )
175
+ (feed_forward): PositionwiseFeedForward(
176
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
177
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
178
+ (dropout): Dropout(p=0.1, inplace=False)
179
+ (activation): ReLU()
180
+ )
181
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
182
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
183
+ (dropout): Dropout(p=0.1, inplace=False)
184
+ )
185
+ (6): EncoderLayer(
186
+ (self_attn): MultiHeadedAttention(
187
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
188
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
189
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
190
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
191
+ (dropout): Dropout(p=0.0, inplace=False)
192
+ )
193
+ (feed_forward): PositionwiseFeedForward(
194
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
195
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
196
+ (dropout): Dropout(p=0.1, inplace=False)
197
+ (activation): ReLU()
198
+ )
199
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
200
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
201
+ (dropout): Dropout(p=0.1, inplace=False)
202
+ )
203
+ (7): EncoderLayer(
204
+ (self_attn): MultiHeadedAttention(
205
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
206
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
207
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
208
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
209
+ (dropout): Dropout(p=0.0, inplace=False)
210
+ )
211
+ (feed_forward): PositionwiseFeedForward(
212
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
213
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
214
+ (dropout): Dropout(p=0.1, inplace=False)
215
+ (activation): ReLU()
216
+ )
217
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
218
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
219
+ (dropout): Dropout(p=0.1, inplace=False)
220
+ )
221
+ (8): EncoderLayer(
222
+ (self_attn): MultiHeadedAttention(
223
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
224
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
225
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
226
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
227
+ (dropout): Dropout(p=0.0, inplace=False)
228
+ )
229
+ (feed_forward): PositionwiseFeedForward(
230
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
231
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
232
+ (dropout): Dropout(p=0.1, inplace=False)
233
+ (activation): ReLU()
234
+ )
235
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
236
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
237
+ (dropout): Dropout(p=0.1, inplace=False)
238
+ )
239
+ (9): EncoderLayer(
240
+ (self_attn): MultiHeadedAttention(
241
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
242
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
243
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
244
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
245
+ (dropout): Dropout(p=0.0, inplace=False)
246
+ )
247
+ (feed_forward): PositionwiseFeedForward(
248
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
249
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
250
+ (dropout): Dropout(p=0.1, inplace=False)
251
+ (activation): ReLU()
252
+ )
253
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
254
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
255
+ (dropout): Dropout(p=0.1, inplace=False)
256
+ )
257
+ (10): EncoderLayer(
258
+ (self_attn): MultiHeadedAttention(
259
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
260
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
261
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
262
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
263
+ (dropout): Dropout(p=0.0, inplace=False)
264
+ )
265
+ (feed_forward): PositionwiseFeedForward(
266
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
267
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
268
+ (dropout): Dropout(p=0.1, inplace=False)
269
+ (activation): ReLU()
270
+ )
271
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
272
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
273
+ (dropout): Dropout(p=0.1, inplace=False)
274
+ )
275
+ (11): EncoderLayer(
276
+ (self_attn): MultiHeadedAttention(
277
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
278
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
279
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
280
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
281
+ (dropout): Dropout(p=0.0, inplace=False)
282
+ )
283
+ (feed_forward): PositionwiseFeedForward(
284
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
285
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
286
+ (dropout): Dropout(p=0.1, inplace=False)
287
+ (activation): ReLU()
288
+ )
289
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
290
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
291
+ (dropout): Dropout(p=0.1, inplace=False)
292
+ )
293
+ )
294
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
295
+ )
296
+ (decoder): TransformerDecoder(
297
+ (embed): Sequential(
298
+ (0): Embedding(40, 256)
299
+ (1): PositionalEncoding(
300
+ (dropout): Dropout(p=0.1, inplace=False)
301
+ )
302
+ )
303
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
304
+ (output_layer): Linear(in_features=256, out_features=40, bias=True)
305
+ (decoders): MultiSequential(
306
+ (0): DecoderLayer(
307
+ (self_attn): MultiHeadedAttention(
308
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
309
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
310
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
311
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
312
+ (dropout): Dropout(p=0.0, inplace=False)
313
+ )
314
+ (src_attn): MultiHeadedAttention(
315
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
316
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
317
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
318
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
319
+ (dropout): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (feed_forward): PositionwiseFeedForward(
322
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
323
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
324
+ (dropout): Dropout(p=0.1, inplace=False)
325
+ (activation): ReLU()
326
+ )
327
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
328
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
329
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
330
+ (dropout): Dropout(p=0.1, inplace=False)
331
+ )
332
+ (1): DecoderLayer(
333
+ (self_attn): MultiHeadedAttention(
334
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
335
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
336
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
337
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
338
+ (dropout): Dropout(p=0.0, inplace=False)
339
+ )
340
+ (src_attn): MultiHeadedAttention(
341
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
342
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
343
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
344
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
345
+ (dropout): Dropout(p=0.0, inplace=False)
346
+ )
347
+ (feed_forward): PositionwiseFeedForward(
348
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
349
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
350
+ (dropout): Dropout(p=0.1, inplace=False)
351
+ (activation): ReLU()
352
+ )
353
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
354
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
355
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
356
+ (dropout): Dropout(p=0.1, inplace=False)
357
+ )
358
+ (2): DecoderLayer(
359
+ (self_attn): MultiHeadedAttention(
360
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
361
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
362
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
363
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
364
+ (dropout): Dropout(p=0.0, inplace=False)
365
+ )
366
+ (src_attn): MultiHeadedAttention(
367
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
368
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
369
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
370
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
371
+ (dropout): Dropout(p=0.0, inplace=False)
372
+ )
373
+ (feed_forward): PositionwiseFeedForward(
374
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
375
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
376
+ (dropout): Dropout(p=0.1, inplace=False)
377
+ (activation): ReLU()
378
+ )
379
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
380
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
381
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
382
+ (dropout): Dropout(p=0.1, inplace=False)
383
+ )
384
+ (3): DecoderLayer(
385
+ (self_attn): MultiHeadedAttention(
386
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
387
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
388
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
389
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
390
+ (dropout): Dropout(p=0.0, inplace=False)
391
+ )
392
+ (src_attn): MultiHeadedAttention(
393
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
394
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
395
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
396
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
397
+ (dropout): Dropout(p=0.0, inplace=False)
398
+ )
399
+ (feed_forward): PositionwiseFeedForward(
400
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
401
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
402
+ (dropout): Dropout(p=0.1, inplace=False)
403
+ (activation): ReLU()
404
+ )
405
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
406
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
407
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
408
+ (dropout): Dropout(p=0.1, inplace=False)
409
+ )
410
+ (4): DecoderLayer(
411
+ (self_attn): MultiHeadedAttention(
412
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
413
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
414
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
415
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
416
+ (dropout): Dropout(p=0.0, inplace=False)
417
+ )
418
+ (src_attn): MultiHeadedAttention(
419
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
420
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
421
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
422
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
423
+ (dropout): Dropout(p=0.0, inplace=False)
424
+ )
425
+ (feed_forward): PositionwiseFeedForward(
426
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
427
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
428
+ (dropout): Dropout(p=0.1, inplace=False)
429
+ (activation): ReLU()
430
+ )
431
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
432
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
433
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
434
+ (dropout): Dropout(p=0.1, inplace=False)
435
+ )
436
+ (5): DecoderLayer(
437
+ (self_attn): MultiHeadedAttention(
438
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
439
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
440
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
441
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
442
+ (dropout): Dropout(p=0.0, inplace=False)
443
+ )
444
+ (src_attn): MultiHeadedAttention(
445
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
446
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
447
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
448
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
449
+ (dropout): Dropout(p=0.0, inplace=False)
450
+ )
451
+ (feed_forward): PositionwiseFeedForward(
452
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
453
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
454
+ (dropout): Dropout(p=0.1, inplace=False)
455
+ (activation): ReLU()
456
+ )
457
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
458
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
459
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
460
+ (dropout): Dropout(p=0.1, inplace=False)
461
+ )
462
+ )
463
+ )
464
+ (ctc): CTC(
465
+ (ctc_lo): Linear(in_features=256, out_features=40, bias=True)
466
+ (ctc_loss): CTCLoss()
467
+ )
468
+ (criterion_att): LabelSmoothingLoss(
469
+ (criterion): KLDivLoss()
470
+ )
471
+ )
472
+
473
+ Model summary:
474
+ Class Name: ESPnetASRModel
475
+ Total Number of model parameters: 27.12 M
476
+ Number of trainable parameters: 27.12 M (100.0%)
477
+ Size: 108.49 MB
478
+ Type: torch.float32
479
+ [r041] 2021-12-21 22:24:18,952 (abs_task:1136) INFO: Optimizer:
480
+ Adam (
481
+ Parameter Group 0
482
+ amsgrad: False
483
+ betas: (0.9, 0.999)
484
+ eps: 1e-08
485
+ initial_lr: 0.0002
486
+ lr: 8e-09
487
+ weight_decay: 0
488
+ )
489
+ [r041] 2021-12-21 22:24:18,952 (abs_task:1137) INFO: Scheduler: WarmupLR(warmup_steps=25000)
490
+ [r041] 2021-12-21 22:24:18,954 (abs_task:1146) INFO: Saving the configuration in exp/asr_stats_raw_en_word/logdir/stats.1/config.yaml
491
+ [r041] 2021-12-21 22:24:18,966 (abs_task:1157) INFO: Namespace(config='conf/train_asr.yaml', print_config=False, log_level='INFO', dry_run=False, iterator_type='sequence', output_dir='exp/asr_stats_raw_en_word/logdir/stats.1', ngpu=0, seed=0, num_workers=1, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=False, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=True, collect_stats=True, write_collected_feats=False, max_epoch=50, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[('train', 'loss', 'min'), ('valid', 'loss', 'min'), ('train', 'acc', 'max'), ('valid', 'acc', 'max')], keep_nbest_models=5, grad_clip=5.0, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=None, use_tensorboard=True, use_wandb=False, wandb_project=None, wandb_id=None, wandb_entity=None, wandb_name=None, wandb_model_log_interval=-1, detect_anomaly=False, pretrain_path=None, init_param=[], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=None, batch_size=20, valid_batch_size=None, batch_bins=1000000, valid_batch_bins=None, train_shape_file=['exp/asr_stats_raw_en_word/logdir/train.1.scp'], valid_shape_file=['exp/asr_stats_raw_en_word/logdir/valid.1.scp'], batch_type='folded', valid_batch_type=None, fold_length=[], sort_in_batch='descending', sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, train_data_path_and_name_and_type=[('dump/raw/train/wav.scp', 'speech', 'sound'), ('dump/raw/train/text', 'text', 'text')], valid_data_path_and_name_and_type=[('dump/raw/valid/wav.scp', 'speech', 'sound'), ('dump/raw/valid/text', 'text', 'text')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, valid_max_cache_size=None, optim='adam', optim_conf={'lr': 0.0002}, scheduler='warmuplr', scheduler_conf={'warmup_steps': 25000}, token_list=['<blank>', '<unk>', '්', 'න', 'ම', 'ක', 'ල', 'ි', 'ු', 'ග', 'ේ', 'ර', 'ත', 'ද', 'ව', 'ට', 'ඕ', 'ී', 'ප', 'ය', 'ෙ', 'ස', 'ණ', 'ා', 'ැ', 'RequestAcc.balance', 'Moneywithdraw', 'Moneydeposit', 'Moneytransfer', 'Billpayments', 'බ', 'ඉ', 'ශ', 'ෂ', 'ඩ', 'Creditcardpayments', 'එ', '\u200d', 'හ', '<sos/eos>'], init=None, input_size=None, ctc_conf={'dropout_rate': 0.0, 'ctc_type': 'builtin', 'reduce': True, 'ignore_nan_grad': True}, model_conf={'ctc_weight': 0.5, 'ignore_id': -1, 'lsm_weight': 0.0, 'length_normalized_loss': False, 'report_cer': True, 'report_wer': True, 'sym_space': '<space>', 'sym_blank': '<blank>', 'extract_feats_in_collect_stats': True}, use_preprocessor=True, token_type='word', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, speech_volume_normalize=None, rir_scp=None, rir_apply_prob=1.0, noise_scp=None, noise_apply_prob=1.0, noise_db_range='13_15', frontend='default', frontend_conf={'fs': '16k'}, specaug='specaug', specaug_conf={'apply_time_warp': True, 'time_warp_window': 5, 'time_warp_mode': 'bicubic', 'apply_freq_mask': True, 'freq_mask_width_range': [0, 30], 'num_freq_mask': 2, 'apply_time_mask': True, 'time_mask_width_range': [0, 40], 'num_time_mask': 2}, normalize='utterance_mvn', normalize_conf={}, preencoder=None, preencoder_conf={}, encoder='transformer', encoder_conf={'output_size': 256, 'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 12, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'attention_dropout_rate': 0.0, 'input_layer': 'conv2d', 'normalize_before': True}, postencoder=None, postencoder_conf={}, decoder='transformer', decoder_conf={'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 6, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'self_attention_dropout_rate': 0.0, 'src_attention_dropout_rate': 0.0}, required=['output_dir', 'token_list'], version='0.10.3a3', distributed=False)
492
+ # Accounting: begin_time=1640143393
493
+ # Accounting: end_time=1640143471
494
+ # Accounting: time=78 threads=1
495
+ # Finished at Tue Dec 21 22:24:31 EST 2021 with status 0
exp/asr_stats_raw_en_word/logdir/stats.1/config.yaml ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_stats_raw_en_word/logdir/stats.1
7
+ ngpu: 0
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: null
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: true
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - train
38
+ - loss
39
+ - min
40
+ - - valid
41
+ - loss
42
+ - min
43
+ - - train
44
+ - acc
45
+ - max
46
+ - - valid
47
+ - acc
48
+ - max
49
+ keep_nbest_models: 5
50
+ grad_clip: 5.0
51
+ grad_clip_type: 2.0
52
+ grad_noise: false
53
+ accum_grad: 1
54
+ no_forward_run: false
55
+ resume: false
56
+ train_dtype: float32
57
+ use_amp: false
58
+ log_interval: null
59
+ use_tensorboard: true
60
+ use_wandb: false
61
+ wandb_project: null
62
+ wandb_id: null
63
+ wandb_entity: null
64
+ wandb_name: null
65
+ wandb_model_log_interval: -1
66
+ detect_anomaly: false
67
+ pretrain_path: null
68
+ init_param: []
69
+ ignore_init_mismatch: false
70
+ freeze_param: []
71
+ num_iters_per_epoch: null
72
+ batch_size: 20
73
+ valid_batch_size: null
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/asr_stats_raw_en_word/logdir/train.1.scp
78
+ valid_shape_file:
79
+ - exp/asr_stats_raw_en_word/logdir/valid.1.scp
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length: []
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train/wav.scp
91
+ - speech
92
+ - sound
93
+ - - dump/raw/train/text
94
+ - text
95
+ - text
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/valid/wav.scp
98
+ - speech
99
+ - sound
100
+ - - dump/raw/valid/text
101
+ - text
102
+ - text
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.0002
110
+ scheduler: warmuplr
111
+ scheduler_conf:
112
+ warmup_steps: 25000
113
+ token_list:
114
+ - <blank>
115
+ - <unk>
116
+ - ්
117
+ - න
118
+ - ම
119
+ - ක
120
+ - ල
121
+ - ි
122
+ - ු
123
+ - ග
124
+ - ේ
125
+ - ර
126
+ - ත
127
+ - ද
128
+ - ව
129
+ - ට
130
+ - ඕ
131
+ - ී
132
+ - ප
133
+ - ය
134
+ - ෙ
135
+ - ස
136
+ - ණ
137
+ - ා
138
+ - ැ
139
+ - RequestAcc.balance
140
+ - Moneywithdraw
141
+ - Moneydeposit
142
+ - Moneytransfer
143
+ - Billpayments
144
+ - බ
145
+ - ඉ
146
+ - ශ
147
+ - ෂ
148
+ - ඩ
149
+ - Creditcardpayments
150
+ - එ
151
+ - ‍
152
+ - හ
153
+ - <sos/eos>
154
+ init: null
155
+ input_size: null
156
+ ctc_conf:
157
+ dropout_rate: 0.0
158
+ ctc_type: builtin
159
+ reduce: true
160
+ ignore_nan_grad: true
161
+ model_conf:
162
+ ctc_weight: 0.5
163
+ ignore_id: -1
164
+ lsm_weight: 0.0
165
+ length_normalized_loss: false
166
+ report_cer: true
167
+ report_wer: true
168
+ sym_space: <space>
169
+ sym_blank: <blank>
170
+ extract_feats_in_collect_stats: true
171
+ use_preprocessor: true
172
+ token_type: word
173
+ bpemodel: null
174
+ non_linguistic_symbols: null
175
+ cleaner: null
176
+ g2p: null
177
+ speech_volume_normalize: null
178
+ rir_scp: null
179
+ rir_apply_prob: 1.0
180
+ noise_scp: null
181
+ noise_apply_prob: 1.0
182
+ noise_db_range: '13_15'
183
+ frontend: default
184
+ frontend_conf:
185
+ fs: 16k
186
+ specaug: specaug
187
+ specaug_conf:
188
+ apply_time_warp: true
189
+ time_warp_window: 5
190
+ time_warp_mode: bicubic
191
+ apply_freq_mask: true
192
+ freq_mask_width_range:
193
+ - 0
194
+ - 30
195
+ num_freq_mask: 2
196
+ apply_time_mask: true
197
+ time_mask_width_range:
198
+ - 0
199
+ - 40
200
+ num_time_mask: 2
201
+ normalize: utterance_mvn
202
+ normalize_conf: {}
203
+ preencoder: null
204
+ preencoder_conf: {}
205
+ encoder: transformer
206
+ encoder_conf:
207
+ output_size: 256
208
+ attention_heads: 4
209
+ linear_units: 2048
210
+ num_blocks: 12
211
+ dropout_rate: 0.1
212
+ positional_dropout_rate: 0.1
213
+ attention_dropout_rate: 0.0
214
+ input_layer: conv2d
215
+ normalize_before: true
216
+ postencoder: null
217
+ postencoder_conf: {}
218
+ decoder: transformer
219
+ decoder_conf:
220
+ attention_heads: 4
221
+ linear_units: 2048
222
+ num_blocks: 6
223
+ dropout_rate: 0.1
224
+ positional_dropout_rate: 0.1
225
+ self_attention_dropout_rate: 0.0
226
+ src_attention_dropout_rate: 0.0
227
+ required:
228
+ - output_dir
229
+ - token_list
230
+ version: 0.10.3a3
231
+ distributed: false
exp/asr_stats_raw_en_word/logdir/stats.1/train/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.1/train/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.1/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.1/train/speech_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1520836294077_1_1.wav 35520
2
+ wavs_audio1520836461764_1_8.wav 48960
3
+ wavs_audio1520837122917_1_1.wav 40320
4
+ wavs_audio1520840663051_1_1.wav 34560
5
+ wavs_audio1520841304669_1_1.wav 32640
6
+ wavs_audio1520842275873_6_2.wav 44160
7
+ wavs_audio1520843492200_2_5.wav 57600
8
+ wavs_audio1520843508056_2_7.wav 46080
9
+ wavs_audio1520872925842_2_2.wav 99840
10
+ wavs_audio1521963566571_4_4.wav 53760
11
+ wavs_audio1521963584328_4_5.wav 50880
12
+ wavs_audio1521963604100_4_1.wav 44160
13
+ wavs_audio1521963620973_6_3.wav 43200
14
+ wavs_audio1521965219757_1_1.wav 111360
15
+ wavs_audio1521967020188_1_1.wav 57344
16
+ wavs_audio1521967072918_1_3.wav 52776
17
+ wavs_audio1521967088867_1_4.wav 55655
18
+ wavs_audio1521967143467_4_4.wav 37358
19
+ wavs_audio1521967174048_5_2.wav 57344
20
+ wavs_audio1521967182568_5_3.wav 57344
21
+ wavs_audio1521967191647_5_4.wav 57002
22
+ wavs_audio1521967210928_2_1.wav 50315
23
+ wavs_audio1521967218948_2_2.wav 48086
24
+ wavs_audio1521967234607_2_4.wav 47621
25
+ wavs_audio1521971805599_1_2.wav 80640
26
+ wavs_audio1521971823304_1_3.wav 48960
27
+ wavs_audio1521971839606_1_4.wav 65280
28
+ wavs_audio1521971851847_1_5.wav 60480
29
+ wavs_audio1521971860971_1_6.wav 42240
30
+ wavs_audio1521971875937_1_7.wav 69120
31
+ wavs_audio1521971898008_1_1.wav 61440
32
+ wavs_audio1521971912791_4_1.wav 45120
33
+ wavs_audio1521971921691_4_2.wav 54720
34
+ wavs_audio1521971930457_4_3.wav 45120
35
+ wavs_audio1521972707739_4_4.wav 44160
36
+ wavs_audio1521972722300_4_3.wav 36480
37
+ wavs_audio1521972738905_4_5.wav 41280
38
+ wavs_audio1521977707787_1_2.wav 53760
39
+ wavs_audio1521977720120_1_3.wav 44160
40
+ wavs_audio1521977752026_1_4.wav 50880
41
+ wavs_audio1521977773902_1_5.wav 48000
42
+ wavs_audio1521977783661_1_6.wav 48960
43
+ wavs_audio1521977799200_1_7.wav 69120
44
+ wavs_audio1521977813700_1_8.wav 58560
45
+ wavs_audio1521977891866_2_5.wav 47040
46
+ wavs_audio1521977899927_2_6.wav 50880
47
+ wavs_audio1521977907088_2_7.wav 46080
48
+ wavs_audio1521977947642_3_1.wav 43200
49
+ wavs_audio1521977957590_3_2.wav 35520
50
+ wavs_audio1521977976433_3_4.wav 36480
51
+ wavs_audio1521977987573_3_5.wav 58560
52
+ wavs_audio1521977996513_3_6.wav 46080
53
+ wavs_audio1521978015573_3_8.wav 40320
54
+ wavs_audio1521978052852_4_1.wav 45120
55
+ wavs_audio1521978062932_4_2.wav 46080
56
+ wavs_audio1521978075070_4_3.wav 48960
57
+ wavs_audio1521978082126_4_4.wav 31680
58
+ wavs_audio1521978092090_4_5.wav 46080
59
+ wavs_audio1521978127326_5_1.wav 57600
60
+ wavs_audio1521978179622_5_2.wav 61440
61
+ wavs_audio1521978202372_5_3.wav 55680
62
+ wavs_audio1521978222792_5_4.wav 61440
63
+ wavs_audio1521978235071_5_5.wav 57600
64
+ wavs_audio1521978250135_5_6.wav 62400
65
+ wavs_audio1521978326183_6_4.wav 58560
66
+ wavs_audio1521978343427_6_3.wav 54720
67
+ wavs_audio1521978355588_6_4.wav 57600
68
+ wavs_audio1521978677087_1_2.wav 86400
69
+ wavs_audio1521978712263_1_1.wav 71040
70
+ wavs_audio1521978727285_1_3.wav 48000
71
+ wavs_audio1521978740460_1_4.wav 57600
72
+ wavs_audio1521978758646_1_6.wav 37440
73
+ wavs_audio1521978773477_1_7.wav 68160
74
+ wavs_audio1521978806858_2_1.wav 48000
75
+ wavs_audio1521978814597_2_2.wav 47040
76
+ wavs_audio1521978821952_2_3.wav 43200
77
+ wavs_audio1521978845653_2_6.wav 37440
78
+ wavs_audio1521978853183_2_7.wav 45120
79
+ wavs_audio1521978869235_1_1.wav 64320
80
+ wavs_audio1521978872384_3_1.wav 38400
81
+ wavs_audio1521978888019_3_3.wav 39360
82
+ wavs_audio1521978888670_1_2.wav 50880
83
+ wavs_audio1521978908272_3_5.wav 48000
84
+ wavs_audio1521978914741_3_6.wav 50880
85
+ wavs_audio1521978924267_3_7.wav 34560
86
+ wavs_audio1521978933309_3_8.wav 35520
87
+ wavs_audio1521978940490_1_4.wav 46080
88
+ wavs_audio1521978950982_4_2.wav 43200
89
+ wavs_audio1521978958260_4_3.wav 39360
90
+ wavs_audio1521978969477_4_4.wav 39360
91
+ wavs_audio1521978973155_1_6.wav 40320
92
+ wavs_audio1521978993428_1_2.wav 72000
93
+ wavs_audio1521979014107_5_1.wav 67200
94
+ wavs_audio1521979015035_1_3.wav 58560
95
+ wavs_audio1521979027700_5_2.wav 88320
96
+ wavs_audio1521979039812_1_4.wav 73920
97
+ wavs_audio1521979050276_5_4.wav 64320
98
+ wavs_audio1521979057273_1_5.wav 69120
99
+ wavs_audio1521979062158_5_5.wav 64320
100
+ wavs_audio1521979078672_5_7.wav 53760
101
+ wavs_audio1521979090759_1_3.wav 46080
102
+ wavs_audio1521979096579_6_1.wav 51840
103
+ wavs_audio1521979102002_1_7.wav 84480
104
+ wavs_audio1521979106195_6_2.wav 55680
105
+ wavs_audio1521979106979_1_4.wav 48960
106
+ wavs_audio1521979119723_1_8.wav 95040
107
+ wavs_audio1521979120029_1_5.wav 41280
108
+ wavs_audio1521979123293_6_3.wav 57600
109
+ wavs_audio1521979151139_1_7.wav 66240
110
+ wavs_audio1521979371025_1_1.wav 64320
111
+ wavs_audio1521979395109_1_3.wav 44160
112
+ wavs_audio1521979407130_1_4.wav 53760
113
+ wavs_audio1521979418969_1_5.wav 45120
114
+ wavs_audio1521979451100_1_7.wav 77760
115
+ wavs_audio1521979471524_1_8.wav 70080
116
+ wavs_audio1521979769345_1_1.wav 68160
117
+ wavs_audio1521979802399_1_2.wav 96960
118
+ wavs_audio1521979893089_1_5.wav 83520
119
+ wavs_audio1521979928067_1_7.wav 81600
120
+ wavs_audio1521979952404_1_8.wav 75840
121
+ wavs_audio1521980004898_2_1.wav 72000
122
+ wavs_audio1521980018398_2_2.wav 51840
123
+ wavs_audio1521980039252_2_3.wav 52800
124
+ wavs_audio1521980051832_2_4.wav 55680
125
+ wavs_audio1521980067912_2_5.wav 49920
126
+ wavs_audio1521980094101_2_7.wav 53760
127
+ wavs_audio1521980170026_3_3.wav 49920
128
+ wavs_audio1521980191245_3_5.wav 43200
129
+ wavs_audio1521980205044_3_6.wav 42240
130
+ wavs_audio1521980233059_3_7.wav 89280
131
+ wavs_audio1521980244453_3_8.wav 48960
132
+ wavs_audio1521980278845_4_1.wav 46080
133
+ wavs_audio1521980295672_4_2.wav 65280
134
+ wavs_audio1521980306831_4_3.wav 48960
135
+ wavs_audio1521980313648_4_4.wav 33600
136
+ wavs_audio1521980348852_5_1.wav 70080
137
+ wavs_audio1521980361431_5_2.wav 68160
138
+ wavs_audio1521980389212_5_4.wav 69120
139
+ wavs_audio1521980398806_5_5.wav 72960
140
+ wavs_audio1521980413007_5_6.wav 82560
141
+ wavs_audio1521980461538_6_1.wav 52800
142
+ wavs_audio1521980474318_6_2.wav 72000
143
+ wavs_audio1521985594389_3_1.wav 80640
144
+ wavs_audio1521985613538_3_1.wav 90240
145
+ wavs_audio1521985626118_3_1.wav 72960
146
+ wavs_audio1521985663610_2_4.wav 82560
147
+ wavs_audio1521985688421_2_6.wav 83520
148
+ wavs_audio1521990339369_1_1.wav 57344
149
+ wavs_audio1521990360269_1_2.wav 57344
150
+ wavs_audio1521990394150_1_3.wav 57344
151
+ wavs_audio1521990423934_1_4.wav 57344
152
+ wavs_audio1521990453497_1_5.wav 51645
153
+ wavs_audio1521990475391_1_6.wav 57344
154
+ wavs_audio1521990548847_1_8.wav 57344
155
+ wavs_audio1521990572405_2_1.wav 57344
156
+ wavs_audio1521990606637_2_2.wav 57344
157
+ wavs_audio1521990622647_2_3.wav 57344
158
+ wavs_audio1521990651415_2_4.wav 57344
159
+ wavs_audio1521990713296_2_6.wav 54973
160
+ wavs_audio1521990733173_2_7.wav 57344
161
+ wavs_audio1521990844646_3_3.wav 54504
162
+ wavs_audio1521990928443_3_5.wav 57344
163
+ wavs_audio1521990948913_3_6.wav 57344
164
+ wavs_audio1521990965351_3_7.wav 52413
165
+ wavs_audio1521990987567_3_8.wav 57344
166
+ wavs_audio1521991035895_4_1.wav 45544
167
+ wavs_audio1521991054817_4_2.wav 57344
168
+ wavs_audio1521991153583_4_4.wav 45245
169
+ wavs_audio1521991191047_4_5.wav 57344
170
+ wavs_audio1521991250215_5_1.wav 57344
171
+ wavs_audio1521991440896_5_3.wav 57344
172
+ wavs_audio1521991611232_5_5.wav 57344
173
+ wavs_audio1521991643172_5_6.wav 57344
174
+ wavs_audio1521991717586_6_1.wav 57344
175
+ wavs_audio1521991757232_6_2.wav 57344
176
+ wavs_audio1521991785946_6_3.wav 57344
177
+ wavs_audio1521991849774_6_4.wav 57344
178
+ wavs_audio1521998390206_1_1.wav 48000
179
+ wavs_audio1521998665288_2_7.wav 50880
180
+ wavs_audio1521998752944_3_6.wav 59520
181
+ wavs_audio1521998843131_4_4.wav 37440
182
+ wavs_audio1521998905583_5_6.wav 51840
183
+ wavs_audio1521998964991_6_3.wav 58560
184
+ wavs_audio1522007605101_1_1.wav 53760
185
+ wavs_audio1522007643961_1_3.wav 40320
186
+ wavs_audio1522007658801_1_4.wav 40320
187
+ wavs_audio1522007670811_1_5.wav 38400
188
+ wavs_audio1522007681976_1_6.wav 42240
189
+ wavs_audio1522007696866_1_7.wav 50880
190
+ wavs_audio1522007710971_1_8.wav 49920
191
+ wavs_audio1522007735606_1_7.wav 87360
exp/asr_stats_raw_en_word/logdir/stats.1/train/stats_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feats
2
+ feats_lengths
exp/asr_stats_raw_en_word/logdir/stats.1/train/text_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1520836294077_1_1.wav 18
2
+ wavs_audio1520836461764_1_8.wav 33
3
+ wavs_audio1520837122917_1_1.wav 18
4
+ wavs_audio1520840663051_1_1.wav 18
5
+ wavs_audio1520841304669_1_1.wav 18
6
+ wavs_audio1520842275873_6_2.wav 23
7
+ wavs_audio1520843492200_2_5.wav 14
8
+ wavs_audio1520843508056_2_7.wav 20
9
+ wavs_audio1520872925842_2_2.wav 23
10
+ wavs_audio1521963566571_4_4.wav 12
11
+ wavs_audio1521963584328_4_5.wav 20
12
+ wavs_audio1521963604100_4_1.wav 18
13
+ wavs_audio1521963620973_6_3.wav 33
14
+ wavs_audio1521965219757_1_1.wav 18
15
+ wavs_audio1521967020188_1_1.wav 18
16
+ wavs_audio1521967072918_1_3.wav 9
17
+ wavs_audio1521967088867_1_4.wav 20
18
+ wavs_audio1521967143467_4_4.wav 12
19
+ wavs_audio1521967174048_5_2.wav 32
20
+ wavs_audio1521967182568_5_3.wav 29
21
+ wavs_audio1521967191647_5_4.wav 29
22
+ wavs_audio1521967210928_2_1.wav 23
23
+ wavs_audio1521967218948_2_2.wav 23
24
+ wavs_audio1521967234607_2_4.wav 21
25
+ wavs_audio1521971805599_1_2.wav 15
26
+ wavs_audio1521971823304_1_3.wav 9
27
+ wavs_audio1521971839606_1_4.wav 20
28
+ wavs_audio1521971851847_1_5.wav 17
29
+ wavs_audio1521971860971_1_6.wav 11
30
+ wavs_audio1521971875937_1_7.wav 31
31
+ wavs_audio1521971898008_1_1.wav 18
32
+ wavs_audio1521971912791_4_1.wav 18
33
+ wavs_audio1521971921691_4_2.wav 20
34
+ wavs_audio1521971930457_4_3.wav 18
35
+ wavs_audio1521972707739_4_4.wav 12
36
+ wavs_audio1521972722300_4_3.wav 18
37
+ wavs_audio1521972738905_4_5.wav 20
38
+ wavs_audio1521977707787_1_2.wav 15
39
+ wavs_audio1521977720120_1_3.wav 9
40
+ wavs_audio1521977752026_1_4.wav 20
41
+ wavs_audio1521977773902_1_5.wav 17
42
+ wavs_audio1521977783661_1_6.wav 11
43
+ wavs_audio1521977799200_1_7.wav 31
44
+ wavs_audio1521977813700_1_8.wav 33
45
+ wavs_audio1521977891866_2_5.wav 14
46
+ wavs_audio1521977899927_2_6.wav 20
47
+ wavs_audio1521977907088_2_7.wav 20
48
+ wavs_audio1521977947642_3_1.wav 15
49
+ wavs_audio1521977957590_3_2.wav 13
50
+ wavs_audio1521977976433_3_4.wav 13
51
+ wavs_audio1521977987573_3_5.wav 20
52
+ wavs_audio1521977996513_3_6.wav 20
53
+ wavs_audio1521978015573_3_8.wav 13
54
+ wavs_audio1521978052852_4_1.wav 18
55
+ wavs_audio1521978062932_4_2.wav 20
56
+ wavs_audio1521978075070_4_3.wav 18
57
+ wavs_audio1521978082126_4_4.wav 12
58
+ wavs_audio1521978092090_4_5.wav 20
59
+ wavs_audio1521978127326_5_1.wav 27
60
+ wavs_audio1521978179622_5_2.wav 32
61
+ wavs_audio1521978202372_5_3.wav 29
62
+ wavs_audio1521978222792_5_4.wav 29
63
+ wavs_audio1521978235071_5_5.wav 27
64
+ wavs_audio1521978250135_5_6.wav 27
65
+ wavs_audio1521978326183_6_4.wav 35
66
+ wavs_audio1521978343427_6_3.wav 33
67
+ wavs_audio1521978355588_6_4.wav 35
68
+ wavs_audio1521978677087_1_2.wav 15
69
+ wavs_audio1521978712263_1_1.wav 18
70
+ wavs_audio1521978727285_1_3.wav 9
71
+ wavs_audio1521978740460_1_4.wav 20
72
+ wavs_audio1521978758646_1_6.wav 11
73
+ wavs_audio1521978773477_1_7.wav 31
74
+ wavs_audio1521978806858_2_1.wav 23
75
+ wavs_audio1521978814597_2_2.wav 23
76
+ wavs_audio1521978821952_2_3.wav 21
77
+ wavs_audio1521978845653_2_6.wav 20
78
+ wavs_audio1521978853183_2_7.wav 20
79
+ wavs_audio1521978869235_1_1.wav 18
80
+ wavs_audio1521978872384_3_1.wav 15
81
+ wavs_audio1521978888019_3_3.wav 15
82
+ wavs_audio1521978888670_1_2.wav 15
83
+ wavs_audio1521978908272_3_5.wav 20
84
+ wavs_audio1521978914741_3_6.wav 20
85
+ wavs_audio1521978924267_3_7.wav 13
86
+ wavs_audio1521978933309_3_8.wav 13
87
+ wavs_audio1521978940490_1_4.wav 20
88
+ wavs_audio1521978950982_4_2.wav 20
89
+ wavs_audio1521978958260_4_3.wav 18
90
+ wavs_audio1521978969477_4_4.wav 12
91
+ wavs_audio1521978973155_1_6.wav 11
92
+ wavs_audio1521978993428_1_2.wav 15
93
+ wavs_audio1521979014107_5_1.wav 27
94
+ wavs_audio1521979015035_1_3.wav 9
95
+ wavs_audio1521979027700_5_2.wav 32
96
+ wavs_audio1521979039812_1_4.wav 20
97
+ wavs_audio1521979050276_5_4.wav 29
98
+ wavs_audio1521979057273_1_5.wav 17
99
+ wavs_audio1521979062158_5_5.wav 27
100
+ wavs_audio1521979078672_5_7.wav 29
101
+ wavs_audio1521979090759_1_3.wav 9
102
+ wavs_audio1521979096579_6_1.wav 21
103
+ wavs_audio1521979102002_1_7.wav 31
104
+ wavs_audio1521979106195_6_2.wav 23
105
+ wavs_audio1521979106979_1_4.wav 20
106
+ wavs_audio1521979119723_1_8.wav 33
107
+ wavs_audio1521979120029_1_5.wav 17
108
+ wavs_audio1521979123293_6_3.wav 33
109
+ wavs_audio1521979151139_1_7.wav 31
110
+ wavs_audio1521979371025_1_1.wav 18
111
+ wavs_audio1521979395109_1_3.wav 9
112
+ wavs_audio1521979407130_1_4.wav 20
113
+ wavs_audio1521979418969_1_5.wav 17
114
+ wavs_audio1521979451100_1_7.wav 31
115
+ wavs_audio1521979471524_1_8.wav 33
116
+ wavs_audio1521979769345_1_1.wav 18
117
+ wavs_audio1521979802399_1_2.wav 15
118
+ wavs_audio1521979893089_1_5.wav 17
119
+ wavs_audio1521979928067_1_7.wav 31
120
+ wavs_audio1521979952404_1_8.wav 33
121
+ wavs_audio1521980004898_2_1.wav 23
122
+ wavs_audio1521980018398_2_2.wav 23
123
+ wavs_audio1521980039252_2_3.wav 21
124
+ wavs_audio1521980051832_2_4.wav 21
125
+ wavs_audio1521980067912_2_5.wav 14
126
+ wavs_audio1521980094101_2_7.wav 20
127
+ wavs_audio1521980170026_3_3.wav 15
128
+ wavs_audio1521980191245_3_5.wav 20
129
+ wavs_audio1521980205044_3_6.wav 20
130
+ wavs_audio1521980233059_3_7.wav 13
131
+ wavs_audio1521980244453_3_8.wav 13
132
+ wavs_audio1521980278845_4_1.wav 18
133
+ wavs_audio1521980295672_4_2.wav 20
134
+ wavs_audio1521980306831_4_3.wav 18
135
+ wavs_audio1521980313648_4_4.wav 12
136
+ wavs_audio1521980348852_5_1.wav 27
137
+ wavs_audio1521980361431_5_2.wav 32
138
+ wavs_audio1521980389212_5_4.wav 29
139
+ wavs_audio1521980398806_5_5.wav 27
140
+ wavs_audio1521980413007_5_6.wav 27
141
+ wavs_audio1521980461538_6_1.wav 21
142
+ wavs_audio1521980474318_6_2.wav 23
143
+ wavs_audio1521985594389_3_1.wav 15
144
+ wavs_audio1521985613538_3_1.wav 15
145
+ wavs_audio1521985626118_3_1.wav 15
146
+ wavs_audio1521985663610_2_4.wav 21
147
+ wavs_audio1521985688421_2_6.wav 20
148
+ wavs_audio1521990339369_1_1.wav 18
149
+ wavs_audio1521990360269_1_2.wav 15
150
+ wavs_audio1521990394150_1_3.wav 9
151
+ wavs_audio1521990423934_1_4.wav 20
152
+ wavs_audio1521990453497_1_5.wav 17
153
+ wavs_audio1521990475391_1_6.wav 11
154
+ wavs_audio1521990548847_1_8.wav 33
155
+ wavs_audio1521990572405_2_1.wav 23
156
+ wavs_audio1521990606637_2_2.wav 23
157
+ wavs_audio1521990622647_2_3.wav 21
158
+ wavs_audio1521990651415_2_4.wav 21
159
+ wavs_audio1521990713296_2_6.wav 20
160
+ wavs_audio1521990733173_2_7.wav 20
161
+ wavs_audio1521990844646_3_3.wav 15
162
+ wavs_audio1521990928443_3_5.wav 20
163
+ wavs_audio1521990948913_3_6.wav 20
164
+ wavs_audio1521990965351_3_7.wav 13
165
+ wavs_audio1521990987567_3_8.wav 13
166
+ wavs_audio1521991035895_4_1.wav 18
167
+ wavs_audio1521991054817_4_2.wav 20
168
+ wavs_audio1521991153583_4_4.wav 12
169
+ wavs_audio1521991191047_4_5.wav 20
170
+ wavs_audio1521991250215_5_1.wav 27
171
+ wavs_audio1521991440896_5_3.wav 29
172
+ wavs_audio1521991611232_5_5.wav 27
173
+ wavs_audio1521991643172_5_6.wav 27
174
+ wavs_audio1521991717586_6_1.wav 21
175
+ wavs_audio1521991757232_6_2.wav 23
176
+ wavs_audio1521991785946_6_3.wav 33
177
+ wavs_audio1521991849774_6_4.wav 35
178
+ wavs_audio1521998390206_1_1.wav 18
179
+ wavs_audio1521998665288_2_7.wav 20
180
+ wavs_audio1521998752944_3_6.wav 20
181
+ wavs_audio1521998843131_4_4.wav 12
182
+ wavs_audio1521998905583_5_6.wav 27
183
+ wavs_audio1521998964991_6_3.wav 33
184
+ wavs_audio1522007605101_1_1.wav 18
185
+ wavs_audio1522007643961_1_3.wav 9
186
+ wavs_audio1522007658801_1_4.wav 20
187
+ wavs_audio1522007670811_1_5.wav 17
188
+ wavs_audio1522007681976_1_6.wav 11
189
+ wavs_audio1522007696866_1_7.wav 31
190
+ wavs_audio1522007710971_1_8.wav 33
191
+ wavs_audio1522007735606_1_7.wav 31
exp/asr_stats_raw_en_word/logdir/stats.1/valid/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.1/valid/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.1/valid/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.1/valid/speech_shape ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1521967108757_4_1.wav 48922
2
+ wavs_audio1521967226793_2_3.wav 49340
3
+ wavs_audio1521971937930_4_4.wav 38400
4
+ wavs_audio1521971946892_4_5.wav 44160
5
+ wavs_audio1521977862147_2_2.wav 53760
6
+ wavs_audio1521978750025_1_5.wav 46080
7
+ wavs_audio1521978830797_2_4.wav 44160
8
+ wavs_audio1521978837483_2_5.wav 42240
9
+ wavs_audio1521978880668_3_2.wav 37440
10
+ wavs_audio1521978977408_4_5.wav 48000
11
+ wavs_audio1521978992321_4_1.wav 47040
12
+ wavs_audio1521979002585_1_8.wav 52800
13
+ wavs_audio1521979070192_5_6.wav 57600
14
+ wavs_audio1521979075133_1_6.wav 59520
15
+ wavs_audio1521979077899_1_2.wav 49920
16
+ wavs_audio1521979132984_1_6.wav 38400
17
+ wavs_audio1521979147057_6_4.wav 65280
18
+ wavs_audio1521979384490_1_2.wav 52800
19
+ wavs_audio1521979852020_1_4.wav 101760
20
+ wavs_audio1521980432472_5_7.wav 71040
21
+ wavs_audio1521980519079_6_4.wav 70080
22
+ wavs_audio1521986099502_1_6.wav 94080
23
+ wavs_audio1521990528827_1_7.wav 57344
24
+ wavs_audio1521990793862_3_2.wav 38973
exp/asr_stats_raw_en_word/logdir/stats.1/valid/stats_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feats
2
+ feats_lengths
exp/asr_stats_raw_en_word/logdir/stats.1/valid/text_shape ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1521967108757_4_1.wav 18
2
+ wavs_audio1521967226793_2_3.wav 21
3
+ wavs_audio1521971937930_4_4.wav 12
4
+ wavs_audio1521971946892_4_5.wav 20
5
+ wavs_audio1521977862147_2_2.wav 23
6
+ wavs_audio1521978750025_1_5.wav 17
7
+ wavs_audio1521978830797_2_4.wav 21
8
+ wavs_audio1521978837483_2_5.wav 14
9
+ wavs_audio1521978880668_3_2.wav 13
10
+ wavs_audio1521978977408_4_5.wav 20
11
+ wavs_audio1521978992321_4_1.wav 18
12
+ wavs_audio1521979002585_1_8.wav 33
13
+ wavs_audio1521979070192_5_6.wav 27
14
+ wavs_audio1521979075133_1_6.wav 11
15
+ wavs_audio1521979077899_1_2.wav 15
16
+ wavs_audio1521979132984_1_6.wav 11
17
+ wavs_audio1521979147057_6_4.wav 35
18
+ wavs_audio1521979384490_1_2.wav 15
19
+ wavs_audio1521979852020_1_4.wav 20
20
+ wavs_audio1521980432472_5_7.wav 29
21
+ wavs_audio1521980519079_6_4.wav 35
22
+ wavs_audio1521986099502_1_6.wav 11
23
+ wavs_audio1521990528827_1_7.wav 31
24
+ wavs_audio1521990793862_3_2.wav 13
exp/asr_stats_raw_en_word/logdir/stats.10.log ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running on r099.ib.bridges2.psc.edu
2
+ # Started at Tue Dec 21 22:24:49 EST 2021
3
+ # SLURMD_NODENAME=r099
4
+ # SLURM_ARRAY_JOB_ID=5730432
5
+ # SLURM_ARRAY_TASK_COUNT=32
6
+ # SLURM_ARRAY_TASK_ID=10
7
+ # SLURM_ARRAY_TASK_MAX=32
8
+ # SLURM_ARRAY_TASK_MIN=1
9
+ # SLURM_ARRAY_TASK_STEP=1
10
+ # SLURM_CLUSTER_NAME=bridges2
11
+ # SLURM_CONF=/var/spool/slurm/d/conf-cache/slurm.conf
12
+ # SLURM_CPUS_ON_NODE=1
13
+ # SLURM_EXPORT_ENV=PATH
14
+ # SLURM_GET_USER_ENV=1
15
+ # SLURM_GTIDS=0
16
+ # SLURM_JOBID=5730463
17
+ # SLURM_JOB_ACCOUNT=cis210027p
18
+ # SLURM_JOB_CPUS_PER_NODE=1
19
+ # SLURM_JOB_GID=24886
20
+ # SLURM_JOB_ID=5730463
21
+ # SLURM_JOB_NAME=stats.sh
22
+ # SLURM_JOB_NODELIST=r099
23
+ # SLURM_JOB_NUM_NODES=1
24
+ # SLURM_JOB_PARTITION=RM-shared
25
+ # SLURM_JOB_QOS=rm
26
+ # SLURM_JOB_UID=82326
27
+ # SLURM_JOB_USER=ganesank
28
+ # SLURM_LOCALID=0
29
+ # SLURM_MEM_PER_CPU=2000
30
+ # SLURM_NNODES=1
31
+ # SLURM_NODEID=0
32
+ # SLURM_NODELIST=r099
33
+ # SLURM_NODE_ALIASES='(null)'
34
+ # SLURM_OPEN_MODE=a
35
+ # SLURM_PRIO_PROCESS=0
36
+ # SLURM_PROCID=0
37
+ # SLURM_SUBMIT_DIR=/ocean/projects/cis210027p/ganesank/karthik_new/espnet/egs2/sinhala/asr1
38
+ # SLURM_SUBMIT_HOST=br012.ib.bridges2.psc.edu
39
+ # SLURM_TASKS_PER_NODE=1
40
+ # SLURM_TASK_PID=6007
41
+ # SLURM_TOPOLOGY_ADDR=r099
42
+ # SLURM_TOPOLOGY_ADDR_PATTERN=node
43
+ # SLURM_WORKING_CLUSTER=bridges2:br003:6814:9216:109
44
+ # python3 -m espnet2.bin.asr_train --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.10.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.10.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.10 --config conf/train_asr.yaml --frontend_conf fs=16k
45
+ /ocean/projects/cis210027p/ganesank/karthik_new/espnet/tools/venv/bin/python3 /ocean/projects/cis210027p/ganesank/karthik_new/espnet/espnet2/bin/asr_train.py --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.10.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.10.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.10 --config conf/train_asr.yaml --frontend_conf fs=16k
46
+ [r099] 2021-12-21 22:25:08,042 (asr:382) INFO: Vocabulary size: 40
47
+ [r099] 2021-12-21 22:25:09,019 (abs_task:1132) INFO: pytorch.version=1.8.1+cu102, cuda.available=False, cudnn.version=7605, cudnn.benchmark=False, cudnn.deterministic=True
48
+ [r099] 2021-12-21 22:25:09,024 (abs_task:1133) INFO: Model structure:
49
+ ESPnetASRModel(
50
+ (frontend): DefaultFrontend(
51
+ (stft): Stft(n_fft=512, win_length=512, hop_length=128, center=True, normalized=False, onesided=True)
52
+ (frontend): Frontend()
53
+ (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
54
+ )
55
+ (specaug): SpecAug(
56
+ (time_warp): TimeWarp(window=5, mode=bicubic)
57
+ (freq_mask): MaskAlongAxis(mask_width_range=[0, 30], num_mask=2, axis=freq)
58
+ (time_mask): MaskAlongAxis(mask_width_range=[0, 40], num_mask=2, axis=time)
59
+ )
60
+ (normalize): UtteranceMVN(norm_means=True, norm_vars=False)
61
+ (encoder): TransformerEncoder(
62
+ (embed): Conv2dSubsampling(
63
+ (conv): Sequential(
64
+ (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(2, 2))
65
+ (1): ReLU()
66
+ (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
67
+ (3): ReLU()
68
+ )
69
+ (out): Sequential(
70
+ (0): Linear(in_features=4864, out_features=256, bias=True)
71
+ (1): PositionalEncoding(
72
+ (dropout): Dropout(p=0.1, inplace=False)
73
+ )
74
+ )
75
+ )
76
+ (encoders): MultiSequential(
77
+ (0): EncoderLayer(
78
+ (self_attn): MultiHeadedAttention(
79
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
80
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
81
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
82
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
83
+ (dropout): Dropout(p=0.0, inplace=False)
84
+ )
85
+ (feed_forward): PositionwiseFeedForward(
86
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
87
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
88
+ (dropout): Dropout(p=0.1, inplace=False)
89
+ (activation): ReLU()
90
+ )
91
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
92
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
93
+ (dropout): Dropout(p=0.1, inplace=False)
94
+ )
95
+ (1): EncoderLayer(
96
+ (self_attn): MultiHeadedAttention(
97
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
98
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
99
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
100
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
101
+ (dropout): Dropout(p=0.0, inplace=False)
102
+ )
103
+ (feed_forward): PositionwiseFeedForward(
104
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
105
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
106
+ (dropout): Dropout(p=0.1, inplace=False)
107
+ (activation): ReLU()
108
+ )
109
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
110
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
111
+ (dropout): Dropout(p=0.1, inplace=False)
112
+ )
113
+ (2): EncoderLayer(
114
+ (self_attn): MultiHeadedAttention(
115
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
116
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
117
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
118
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
119
+ (dropout): Dropout(p=0.0, inplace=False)
120
+ )
121
+ (feed_forward): PositionwiseFeedForward(
122
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
123
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
124
+ (dropout): Dropout(p=0.1, inplace=False)
125
+ (activation): ReLU()
126
+ )
127
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
128
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
129
+ (dropout): Dropout(p=0.1, inplace=False)
130
+ )
131
+ (3): EncoderLayer(
132
+ (self_attn): MultiHeadedAttention(
133
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
134
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
135
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
136
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
137
+ (dropout): Dropout(p=0.0, inplace=False)
138
+ )
139
+ (feed_forward): PositionwiseFeedForward(
140
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
141
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
142
+ (dropout): Dropout(p=0.1, inplace=False)
143
+ (activation): ReLU()
144
+ )
145
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
146
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
147
+ (dropout): Dropout(p=0.1, inplace=False)
148
+ )
149
+ (4): EncoderLayer(
150
+ (self_attn): MultiHeadedAttention(
151
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
152
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
153
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
154
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
155
+ (dropout): Dropout(p=0.0, inplace=False)
156
+ )
157
+ (feed_forward): PositionwiseFeedForward(
158
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
159
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
160
+ (dropout): Dropout(p=0.1, inplace=False)
161
+ (activation): ReLU()
162
+ )
163
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
164
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
165
+ (dropout): Dropout(p=0.1, inplace=False)
166
+ )
167
+ (5): EncoderLayer(
168
+ (self_attn): MultiHeadedAttention(
169
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
170
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
171
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
172
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
173
+ (dropout): Dropout(p=0.0, inplace=False)
174
+ )
175
+ (feed_forward): PositionwiseFeedForward(
176
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
177
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
178
+ (dropout): Dropout(p=0.1, inplace=False)
179
+ (activation): ReLU()
180
+ )
181
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
182
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
183
+ (dropout): Dropout(p=0.1, inplace=False)
184
+ )
185
+ (6): EncoderLayer(
186
+ (self_attn): MultiHeadedAttention(
187
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
188
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
189
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
190
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
191
+ (dropout): Dropout(p=0.0, inplace=False)
192
+ )
193
+ (feed_forward): PositionwiseFeedForward(
194
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
195
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
196
+ (dropout): Dropout(p=0.1, inplace=False)
197
+ (activation): ReLU()
198
+ )
199
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
200
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
201
+ (dropout): Dropout(p=0.1, inplace=False)
202
+ )
203
+ (7): EncoderLayer(
204
+ (self_attn): MultiHeadedAttention(
205
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
206
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
207
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
208
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
209
+ (dropout): Dropout(p=0.0, inplace=False)
210
+ )
211
+ (feed_forward): PositionwiseFeedForward(
212
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
213
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
214
+ (dropout): Dropout(p=0.1, inplace=False)
215
+ (activation): ReLU()
216
+ )
217
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
218
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
219
+ (dropout): Dropout(p=0.1, inplace=False)
220
+ )
221
+ (8): EncoderLayer(
222
+ (self_attn): MultiHeadedAttention(
223
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
224
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
225
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
226
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
227
+ (dropout): Dropout(p=0.0, inplace=False)
228
+ )
229
+ (feed_forward): PositionwiseFeedForward(
230
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
231
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
232
+ (dropout): Dropout(p=0.1, inplace=False)
233
+ (activation): ReLU()
234
+ )
235
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
236
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
237
+ (dropout): Dropout(p=0.1, inplace=False)
238
+ )
239
+ (9): EncoderLayer(
240
+ (self_attn): MultiHeadedAttention(
241
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
242
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
243
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
244
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
245
+ (dropout): Dropout(p=0.0, inplace=False)
246
+ )
247
+ (feed_forward): PositionwiseFeedForward(
248
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
249
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
250
+ (dropout): Dropout(p=0.1, inplace=False)
251
+ (activation): ReLU()
252
+ )
253
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
254
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
255
+ (dropout): Dropout(p=0.1, inplace=False)
256
+ )
257
+ (10): EncoderLayer(
258
+ (self_attn): MultiHeadedAttention(
259
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
260
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
261
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
262
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
263
+ (dropout): Dropout(p=0.0, inplace=False)
264
+ )
265
+ (feed_forward): PositionwiseFeedForward(
266
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
267
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
268
+ (dropout): Dropout(p=0.1, inplace=False)
269
+ (activation): ReLU()
270
+ )
271
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
272
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
273
+ (dropout): Dropout(p=0.1, inplace=False)
274
+ )
275
+ (11): EncoderLayer(
276
+ (self_attn): MultiHeadedAttention(
277
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
278
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
279
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
280
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
281
+ (dropout): Dropout(p=0.0, inplace=False)
282
+ )
283
+ (feed_forward): PositionwiseFeedForward(
284
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
285
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
286
+ (dropout): Dropout(p=0.1, inplace=False)
287
+ (activation): ReLU()
288
+ )
289
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
290
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
291
+ (dropout): Dropout(p=0.1, inplace=False)
292
+ )
293
+ )
294
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
295
+ )
296
+ (decoder): TransformerDecoder(
297
+ (embed): Sequential(
298
+ (0): Embedding(40, 256)
299
+ (1): PositionalEncoding(
300
+ (dropout): Dropout(p=0.1, inplace=False)
301
+ )
302
+ )
303
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
304
+ (output_layer): Linear(in_features=256, out_features=40, bias=True)
305
+ (decoders): MultiSequential(
306
+ (0): DecoderLayer(
307
+ (self_attn): MultiHeadedAttention(
308
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
309
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
310
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
311
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
312
+ (dropout): Dropout(p=0.0, inplace=False)
313
+ )
314
+ (src_attn): MultiHeadedAttention(
315
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
316
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
317
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
318
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
319
+ (dropout): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (feed_forward): PositionwiseFeedForward(
322
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
323
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
324
+ (dropout): Dropout(p=0.1, inplace=False)
325
+ (activation): ReLU()
326
+ )
327
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
328
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
329
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
330
+ (dropout): Dropout(p=0.1, inplace=False)
331
+ )
332
+ (1): DecoderLayer(
333
+ (self_attn): MultiHeadedAttention(
334
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
335
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
336
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
337
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
338
+ (dropout): Dropout(p=0.0, inplace=False)
339
+ )
340
+ (src_attn): MultiHeadedAttention(
341
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
342
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
343
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
344
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
345
+ (dropout): Dropout(p=0.0, inplace=False)
346
+ )
347
+ (feed_forward): PositionwiseFeedForward(
348
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
349
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
350
+ (dropout): Dropout(p=0.1, inplace=False)
351
+ (activation): ReLU()
352
+ )
353
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
354
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
355
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
356
+ (dropout): Dropout(p=0.1, inplace=False)
357
+ )
358
+ (2): DecoderLayer(
359
+ (self_attn): MultiHeadedAttention(
360
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
361
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
362
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
363
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
364
+ (dropout): Dropout(p=0.0, inplace=False)
365
+ )
366
+ (src_attn): MultiHeadedAttention(
367
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
368
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
369
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
370
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
371
+ (dropout): Dropout(p=0.0, inplace=False)
372
+ )
373
+ (feed_forward): PositionwiseFeedForward(
374
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
375
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
376
+ (dropout): Dropout(p=0.1, inplace=False)
377
+ (activation): ReLU()
378
+ )
379
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
380
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
381
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
382
+ (dropout): Dropout(p=0.1, inplace=False)
383
+ )
384
+ (3): DecoderLayer(
385
+ (self_attn): MultiHeadedAttention(
386
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
387
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
388
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
389
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
390
+ (dropout): Dropout(p=0.0, inplace=False)
391
+ )
392
+ (src_attn): MultiHeadedAttention(
393
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
394
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
395
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
396
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
397
+ (dropout): Dropout(p=0.0, inplace=False)
398
+ )
399
+ (feed_forward): PositionwiseFeedForward(
400
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
401
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
402
+ (dropout): Dropout(p=0.1, inplace=False)
403
+ (activation): ReLU()
404
+ )
405
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
406
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
407
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
408
+ (dropout): Dropout(p=0.1, inplace=False)
409
+ )
410
+ (4): DecoderLayer(
411
+ (self_attn): MultiHeadedAttention(
412
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
413
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
414
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
415
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
416
+ (dropout): Dropout(p=0.0, inplace=False)
417
+ )
418
+ (src_attn): MultiHeadedAttention(
419
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
420
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
421
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
422
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
423
+ (dropout): Dropout(p=0.0, inplace=False)
424
+ )
425
+ (feed_forward): PositionwiseFeedForward(
426
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
427
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
428
+ (dropout): Dropout(p=0.1, inplace=False)
429
+ (activation): ReLU()
430
+ )
431
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
432
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
433
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
434
+ (dropout): Dropout(p=0.1, inplace=False)
435
+ )
436
+ (5): DecoderLayer(
437
+ (self_attn): MultiHeadedAttention(
438
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
439
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
440
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
441
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
442
+ (dropout): Dropout(p=0.0, inplace=False)
443
+ )
444
+ (src_attn): MultiHeadedAttention(
445
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
446
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
447
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
448
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
449
+ (dropout): Dropout(p=0.0, inplace=False)
450
+ )
451
+ (feed_forward): PositionwiseFeedForward(
452
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
453
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
454
+ (dropout): Dropout(p=0.1, inplace=False)
455
+ (activation): ReLU()
456
+ )
457
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
458
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
459
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
460
+ (dropout): Dropout(p=0.1, inplace=False)
461
+ )
462
+ )
463
+ )
464
+ (ctc): CTC(
465
+ (ctc_lo): Linear(in_features=256, out_features=40, bias=True)
466
+ (ctc_loss): CTCLoss()
467
+ )
468
+ (criterion_att): LabelSmoothingLoss(
469
+ (criterion): KLDivLoss()
470
+ )
471
+ )
472
+
473
+ Model summary:
474
+ Class Name: ESPnetASRModel
475
+ Total Number of model parameters: 27.12 M
476
+ Number of trainable parameters: 27.12 M (100.0%)
477
+ Size: 108.49 MB
478
+ Type: torch.float32
479
+ [r099] 2021-12-21 22:25:09,024 (abs_task:1136) INFO: Optimizer:
480
+ Adam (
481
+ Parameter Group 0
482
+ amsgrad: False
483
+ betas: (0.9, 0.999)
484
+ eps: 1e-08
485
+ initial_lr: 0.0002
486
+ lr: 8e-09
487
+ weight_decay: 0
488
+ )
489
+ [r099] 2021-12-21 22:25:09,024 (abs_task:1137) INFO: Scheduler: WarmupLR(warmup_steps=25000)
490
+ [r099] 2021-12-21 22:25:09,027 (abs_task:1146) INFO: Saving the configuration in exp/asr_stats_raw_en_word/logdir/stats.10/config.yaml
491
+ [r099] 2021-12-21 22:25:09,037 (abs_task:1157) INFO: Namespace(config='conf/train_asr.yaml', print_config=False, log_level='INFO', dry_run=False, iterator_type='sequence', output_dir='exp/asr_stats_raw_en_word/logdir/stats.10', ngpu=0, seed=0, num_workers=1, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=False, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=True, collect_stats=True, write_collected_feats=False, max_epoch=50, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[('train', 'loss', 'min'), ('valid', 'loss', 'min'), ('train', 'acc', 'max'), ('valid', 'acc', 'max')], keep_nbest_models=5, grad_clip=5.0, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=None, use_tensorboard=True, use_wandb=False, wandb_project=None, wandb_id=None, wandb_entity=None, wandb_name=None, wandb_model_log_interval=-1, detect_anomaly=False, pretrain_path=None, init_param=[], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=None, batch_size=20, valid_batch_size=None, batch_bins=1000000, valid_batch_bins=None, train_shape_file=['exp/asr_stats_raw_en_word/logdir/train.10.scp'], valid_shape_file=['exp/asr_stats_raw_en_word/logdir/valid.10.scp'], batch_type='folded', valid_batch_type=None, fold_length=[], sort_in_batch='descending', sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, train_data_path_and_name_and_type=[('dump/raw/train/wav.scp', 'speech', 'sound'), ('dump/raw/train/text', 'text', 'text')], valid_data_path_and_name_and_type=[('dump/raw/valid/wav.scp', 'speech', 'sound'), ('dump/raw/valid/text', 'text', 'text')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, valid_max_cache_size=None, optim='adam', optim_conf={'lr': 0.0002}, scheduler='warmuplr', scheduler_conf={'warmup_steps': 25000}, token_list=['<blank>', '<unk>', '්', 'න', 'ම', 'ක', 'ල', 'ි', 'ු', 'ග', 'ේ', 'ර', 'ත', 'ද', 'ව', 'ට', 'ඕ', 'ී', 'ප', 'ය', 'ෙ', 'ස', 'ණ', 'ා', 'ැ', 'RequestAcc.balance', 'Moneywithdraw', 'Moneydeposit', 'Moneytransfer', 'Billpayments', 'බ', 'ඉ', 'ශ', 'ෂ', 'ඩ', 'Creditcardpayments', 'එ', '\u200d', 'හ', '<sos/eos>'], init=None, input_size=None, ctc_conf={'dropout_rate': 0.0, 'ctc_type': 'builtin', 'reduce': True, 'ignore_nan_grad': True}, model_conf={'ctc_weight': 0.5, 'ignore_id': -1, 'lsm_weight': 0.0, 'length_normalized_loss': False, 'report_cer': True, 'report_wer': True, 'sym_space': '<space>', 'sym_blank': '<blank>', 'extract_feats_in_collect_stats': True}, use_preprocessor=True, token_type='word', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, speech_volume_normalize=None, rir_scp=None, rir_apply_prob=1.0, noise_scp=None, noise_apply_prob=1.0, noise_db_range='13_15', frontend='default', frontend_conf={'fs': '16k'}, specaug='specaug', specaug_conf={'apply_time_warp': True, 'time_warp_window': 5, 'time_warp_mode': 'bicubic', 'apply_freq_mask': True, 'freq_mask_width_range': [0, 30], 'num_freq_mask': 2, 'apply_time_mask': True, 'time_mask_width_range': [0, 40], 'num_time_mask': 2}, normalize='utterance_mvn', normalize_conf={}, preencoder=None, preencoder_conf={}, encoder='transformer', encoder_conf={'output_size': 256, 'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 12, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'attention_dropout_rate': 0.0, 'input_layer': 'conv2d', 'normalize_before': True}, postencoder=None, postencoder_conf={}, decoder='transformer', decoder_conf={'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 6, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'self_attention_dropout_rate': 0.0, 'src_attention_dropout_rate': 0.0}, required=['output_dir', 'token_list'], version='0.10.3a3', distributed=False)
492
+ # Accounting: begin_time=1640143489
493
+ # Accounting: end_time=1640143519
494
+ # Accounting: time=30 threads=1
495
+ # Finished at Tue Dec 21 22:25:19 EST 2021 with status 0
exp/asr_stats_raw_en_word/logdir/stats.10/config.yaml ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_stats_raw_en_word/logdir/stats.10
7
+ ngpu: 0
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: null
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: true
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - train
38
+ - loss
39
+ - min
40
+ - - valid
41
+ - loss
42
+ - min
43
+ - - train
44
+ - acc
45
+ - max
46
+ - - valid
47
+ - acc
48
+ - max
49
+ keep_nbest_models: 5
50
+ grad_clip: 5.0
51
+ grad_clip_type: 2.0
52
+ grad_noise: false
53
+ accum_grad: 1
54
+ no_forward_run: false
55
+ resume: false
56
+ train_dtype: float32
57
+ use_amp: false
58
+ log_interval: null
59
+ use_tensorboard: true
60
+ use_wandb: false
61
+ wandb_project: null
62
+ wandb_id: null
63
+ wandb_entity: null
64
+ wandb_name: null
65
+ wandb_model_log_interval: -1
66
+ detect_anomaly: false
67
+ pretrain_path: null
68
+ init_param: []
69
+ ignore_init_mismatch: false
70
+ freeze_param: []
71
+ num_iters_per_epoch: null
72
+ batch_size: 20
73
+ valid_batch_size: null
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/asr_stats_raw_en_word/logdir/train.10.scp
78
+ valid_shape_file:
79
+ - exp/asr_stats_raw_en_word/logdir/valid.10.scp
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length: []
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train/wav.scp
91
+ - speech
92
+ - sound
93
+ - - dump/raw/train/text
94
+ - text
95
+ - text
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/valid/wav.scp
98
+ - speech
99
+ - sound
100
+ - - dump/raw/valid/text
101
+ - text
102
+ - text
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.0002
110
+ scheduler: warmuplr
111
+ scheduler_conf:
112
+ warmup_steps: 25000
113
+ token_list:
114
+ - <blank>
115
+ - <unk>
116
+ - ්
117
+ - න
118
+ - ම
119
+ - ක
120
+ - ල
121
+ - ි
122
+ - ු
123
+ - ග
124
+ - ේ
125
+ - ර
126
+ - ත
127
+ - ද
128
+ - ව
129
+ - ට
130
+ - ඕ
131
+ - ී
132
+ - ප
133
+ - ය
134
+ - ෙ
135
+ - ස
136
+ - ණ
137
+ - ා
138
+ - ැ
139
+ - RequestAcc.balance
140
+ - Moneywithdraw
141
+ - Moneydeposit
142
+ - Moneytransfer
143
+ - Billpayments
144
+ - බ
145
+ - ඉ
146
+ - ශ
147
+ - ෂ
148
+ - ඩ
149
+ - Creditcardpayments
150
+ - එ
151
+ - ‍
152
+ - හ
153
+ - <sos/eos>
154
+ init: null
155
+ input_size: null
156
+ ctc_conf:
157
+ dropout_rate: 0.0
158
+ ctc_type: builtin
159
+ reduce: true
160
+ ignore_nan_grad: true
161
+ model_conf:
162
+ ctc_weight: 0.5
163
+ ignore_id: -1
164
+ lsm_weight: 0.0
165
+ length_normalized_loss: false
166
+ report_cer: true
167
+ report_wer: true
168
+ sym_space: <space>
169
+ sym_blank: <blank>
170
+ extract_feats_in_collect_stats: true
171
+ use_preprocessor: true
172
+ token_type: word
173
+ bpemodel: null
174
+ non_linguistic_symbols: null
175
+ cleaner: null
176
+ g2p: null
177
+ speech_volume_normalize: null
178
+ rir_scp: null
179
+ rir_apply_prob: 1.0
180
+ noise_scp: null
181
+ noise_apply_prob: 1.0
182
+ noise_db_range: '13_15'
183
+ frontend: default
184
+ frontend_conf:
185
+ fs: 16k
186
+ specaug: specaug
187
+ specaug_conf:
188
+ apply_time_warp: true
189
+ time_warp_window: 5
190
+ time_warp_mode: bicubic
191
+ apply_freq_mask: true
192
+ freq_mask_width_range:
193
+ - 0
194
+ - 30
195
+ num_freq_mask: 2
196
+ apply_time_mask: true
197
+ time_mask_width_range:
198
+ - 0
199
+ - 40
200
+ num_time_mask: 2
201
+ normalize: utterance_mvn
202
+ normalize_conf: {}
203
+ preencoder: null
204
+ preencoder_conf: {}
205
+ encoder: transformer
206
+ encoder_conf:
207
+ output_size: 256
208
+ attention_heads: 4
209
+ linear_units: 2048
210
+ num_blocks: 12
211
+ dropout_rate: 0.1
212
+ positional_dropout_rate: 0.1
213
+ attention_dropout_rate: 0.0
214
+ input_layer: conv2d
215
+ normalize_before: true
216
+ postencoder: null
217
+ postencoder_conf: {}
218
+ decoder: transformer
219
+ decoder_conf:
220
+ attention_heads: 4
221
+ linear_units: 2048
222
+ num_blocks: 6
223
+ dropout_rate: 0.1
224
+ positional_dropout_rate: 0.1
225
+ self_attention_dropout_rate: 0.0
226
+ src_attention_dropout_rate: 0.0
227
+ required:
228
+ - output_dir
229
+ - token_list
230
+ version: 0.10.3a3
231
+ distributed: false
exp/asr_stats_raw_en_word/logdir/stats.10/train/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.10/train/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.10/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.10/train/speech_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526983626238_6_1.wav 66240
2
+ wavs_audio1526983636871_5_2.wav 51840
3
+ wavs_audio1526983641501_5_3.wav 51840
4
+ wavs_audio1526983643559_6_2.wav 60480
5
+ wavs_audio1526983652301_1_1.wav 56640
6
+ wavs_audio1526983653669_5_1.wav 56640
7
+ wavs_audio1526983663015_5_3.wav 53760
8
+ wavs_audio1526983668420_1_2.wav 42240
9
+ wavs_audio1526983671576_5_3.wav 51840
10
+ wavs_audio1526983674709_5_5.wav 47040
11
+ wavs_audio1526983682930_1_3.wav 39360
12
+ wavs_audio1526983683850_5_6.wav 53760
13
+ wavs_audio1526983684511_5_4.wav 51840
14
+ wavs_audio1526983687446_6_4.wav 61440
15
+ wavs_audio1526983694130_5_7.wav 59520
16
+ wavs_audio1526983695453_1_4.wav 42240
17
+ wavs_audio1526983711192_1_5.wav 44160
18
+ wavs_audio1526983718405_5_5.wav 57600
19
+ wavs_audio1526983722788_6_2.wav 50880
20
+ wavs_audio1526983726631_5_6.wav 49920
21
+ wavs_audio1526983730520_6_3.wav 51840
22
+ wavs_audio1526983732202_1_6.wav 41280
23
+ wavs_audio1526983741838_6_4.wav 57600
24
+ wavs_audio1526983744462_5_7.wav 51840
25
+ wavs_audio1526983759496_6_1.wav 48000
26
+ wavs_audio1526983771215_6_2.wav 45120
27
+ wavs_audio1526983778361_1_1.wav 72000
28
+ wavs_audio1526983780570_6_3.wav 44160
29
+ wavs_audio1526983788348_6_4.wav 52800
30
+ wavs_audio1526983790175_2_4.wav 54720
31
+ wavs_audio1526983800150_1_2.wav 55680
32
+ wavs_audio1526983802451_2_5.wav 48000
33
+ wavs_audio1526983812921_1_3.wav 44160
34
+ wavs_audio1526983815268_2_6.wav 52800
35
+ wavs_audio1526983824560_2_7.wav 53760
36
+ wavs_audio1526983825156_1_4.wav 45120
37
+ wavs_audio1526983850722_1_5.wav 48960
38
+ wavs_audio1526983866391_1_6.wav 44160
39
+ wavs_audio1526983882309_1_7.wav 60480
40
+ wavs_audio1526983886714_3_1.wav 48000
41
+ wavs_audio1526983895919_1_8.wav 54720
42
+ wavs_audio1526983898472_3_2.wav 30720
43
+ wavs_audio1526983911045_3_3.wav 41280
44
+ wavs_audio1526983930113_3_4.wav 44160
45
+ wavs_audio1526983931076_2_1.wav 54720
46
+ wavs_audio1526983945675_2_2.wav 52800
47
+ wavs_audio1526983972052_3_6.wav 59520
48
+ wavs_audio1526983981691_1_3.wav 52800
49
+ wavs_audio1526983990965_1_2.wav 57600
50
+ wavs_audio1526983992330_3_7.wav 46080
51
+ wavs_audio1526983996751_2_4.wav 52800
52
+ wavs_audio1526983997319_1_2.wav 66240
53
+ wavs_audio1526984003559_3_8.wav 39360
54
+ wavs_audio1526984007551_2_5.wav 35520
55
+ wavs_audio1526984011260_1_3.wav 58560
56
+ wavs_audio1526984017034_2_6.wav 34560
57
+ wavs_audio1526984027086_1_4.wav 59520
58
+ wavs_audio1526984029175_4_1.wav 45120
59
+ wavs_audio1526984031089_1_4.wav 68160
60
+ wavs_audio1526984031785_1_1.wav 70080
61
+ wavs_audio1526984036073_2_7.wav 45120
62
+ wavs_audio1526984040829_1_5.wav 49920
63
+ wavs_audio1526984044743_4_2.wav 48000
64
+ wavs_audio1526984055641_4_3.wav 40320
65
+ wavs_audio1526984071399_4_4.wav 44160
66
+ wavs_audio1526984082880_4_5.wav 57600
67
+ wavs_audio1526984083121_1_8.wav 72000
68
+ wavs_audio1526984083509_1_7.wav 72960
69
+ wavs_audio1526984106151_3_2.wav 37440
70
+ wavs_audio1526984114132_5_1.wav 69120
71
+ wavs_audio1526984121674_3_3.wav 38400
72
+ wavs_audio1526984123594_2_4.wav 50880
73
+ wavs_audio1526984135088_2_5.wav 29760
74
+ wavs_audio1526984139552_3_4.wav 31680
75
+ wavs_audio1526984144689_2_6.wav 50880
76
+ wavs_audio1526984147192_1_1.wav 41280
77
+ wavs_audio1526984148728_2_4.wav 72960
78
+ wavs_audio1526984153209_2_7.wav 49920
79
+ wavs_audio1526984156770_3_5.wav 42240
80
+ wavs_audio1526984169153_3_6.wav 52800
81
+ wavs_audio1526984171836_2_1.wav 55680
82
+ wavs_audio1526984173406_2_5.wav 46080
83
+ wavs_audio1526984173931_5_3.wav 70080
84
+ wavs_audio1526984179972_3_1.wav 37440
85
+ wavs_audio1526984186071_3_7.wav 39360
86
+ wavs_audio1526984186573_2_2.wav 59520
87
+ wavs_audio1526984197610_2_3.wav 57600
88
+ wavs_audio1526984200561_3_2.wav 41280
89
+ wavs_audio1526984206448_5_5.wav 65280
90
+ wavs_audio1526984214431_3_3.wav 45120
91
+ wavs_audio1526984217633_3_8.wav 37440
92
+ wavs_audio1526984223567_1_1.wav 48960
93
+ wavs_audio1526984236297_1_2.wav 43200
94
+ wavs_audio1526984237031_3_4.wav 56640
95
+ wavs_audio1526984241114_5_7.wav 68160
96
+ wavs_audio1526984243264_1_4.wav 63360
97
+ wavs_audio1526984245216_2_2.wav 72960
98
+ wavs_audio1526984248224_1_3.wav 45120
99
+ wavs_audio1526984249624_3_5.wav 47040
100
+ wavs_audio1526984255903_2_5.wav 48960
101
+ wavs_audio1526984256425_3_6.wav 42240
102
+ wavs_audio1526984262616_2_2.wav 72000
103
+ wavs_audio1526984265407_1_4.wav 55680
104
+ wavs_audio1526984281593_6_1.wav 66240
105
+ wavs_audio1526984282940_1_6.wav 56640
106
+ wavs_audio1526984295750_2_7.wav 57600
107
+ wavs_audio1526984296338_4_1.wav 83520
108
+ wavs_audio1526984301655_1_6.wav 37440
109
+ wavs_audio1526984307908_1_7.wav 83520
110
+ wavs_audio1526984311940_3_2.wav 51840
111
+ wavs_audio1526984318859_4_2.wav 43200
112
+ wavs_audio1526984321249_3_1.wav 100800
113
+ wavs_audio1526984323422_5_1.wav 86400
114
+ wavs_audio1526984323982_4_1.wav 36480
115
+ wavs_audio1526984326235_1_7.wav 66240
116
+ wavs_audio1526984327222_4_3.wav 60480
117
+ wavs_audio1526984333916_4_4.wav 32640
118
+ wavs_audio1526984334159_4_2.wav 44160
119
+ wavs_audio1526984337198_1_8.wav 65280
120
+ wavs_audio1526984337797_6_3.wav 65280
121
+ wavs_audio1526984345066_4_5.wav 66240
122
+ wavs_audio1526984345363_4_3.wav 38400
123
+ wavs_audio1526984349176_6_3.wav 73920
124
+ wavs_audio1526984351152_3_3.wav 44160
125
+ wavs_audio1526984354681_4_4.wav 29760
126
+ wavs_audio1526984354872_2_4.wav 43200
127
+ wavs_audio1526984357564_6_4.wav 77760
128
+ wavs_audio1526984363716_4_5.wav 35520
129
+ wavs_audio1526984365423_2_1.wav 47040
130
+ wavs_audio1526984372167_3_4.wav 43200
131
+ wavs_audio1526984375689_2_2.wav 50880
132
+ wavs_audio1526984377956_2_1.wav 84480
133
+ wavs_audio1526984381963_5_1.wav 49920
134
+ wavs_audio1526984384912_3_5.wav 53760
135
+ wavs_audio1526984392258_2_3.wav 47040
136
+ wavs_audio1526984400068_6_4.wav 76800
137
+ wavs_audio1526984402818_5_2.wav 51840
138
+ wavs_audio1526984404232_5_2.wav 67200
139
+ wavs_audio1526984407407_3_6.wav 83520
140
+ wavs_audio1526984414330_2_6.wav 53760
141
+ wavs_audio1526984416571_5_3.wav 51840
142
+ wavs_audio1526984417944_2_2.wav 62400
143
+ wavs_audio1526984421235_3_7.wav 39360
144
+ wavs_audio1526984422523_2_7.wav 49920
145
+ wavs_audio1526984426143_5_4.wav 48960
146
+ wavs_audio1526984433191_2_3.wav 53760
147
+ wavs_audio1526984435465_5_5.wav 56640
148
+ wavs_audio1526984443372_5_6.wav 51840
149
+ wavs_audio1526984444218_5_5.wav 47040
150
+ wavs_audio1526984446328_3_8.wav 64320
151
+ wavs_audio1526984455383_2_4.wav 51840
152
+ wavs_audio1526984456002_3_7.wav 50880
153
+ wavs_audio1526984456727_3_2.wav 34560
154
+ wavs_audio1526984460345_5_6.wav 46080
155
+ wavs_audio1526984469365_3_8.wav 53760
156
+ wavs_audio1526984482150_3_3.wav 42240
157
+ wavs_audio1526984485261_2_5.wav 45120
158
+ wavs_audio1526984487880_4_2.wav 53760
159
+ wavs_audio1526984492899_4_1.wav 41280
160
+ wavs_audio1526984497103_5_7.wav 81600
161
+ wavs_audio1526984500341_2_6.wav 54720
162
+ wavs_audio1526984502455_6_1.wav 35520
163
+ wavs_audio1526984504361_3_5.wav 44160
164
+ wavs_audio1526984504722_4_4.wav 39360
165
+ wavs_audio1526984506678_4_2.wav 58560
166
+ wavs_audio1526984512402_6_2.wav 45120
167
+ wavs_audio1526984513900_2_7.wav 48000
168
+ wavs_audio1526984518655_4_1.wav 53760
169
+ wavs_audio1526984523862_6_3.wav 44160
170
+ wavs_audio1526984525492_4_3.wav 72000
171
+ wavs_audio1526984528468_4_2.wav 49920
172
+ wavs_audio1526984533901_6_4.wav 47040
173
+ wavs_audio1526984535229_3_1.wav 44160
174
+ wavs_audio1526984536286_4_3.wav 39360
175
+ wavs_audio1526984538200_5_1.wav 64320
176
+ wavs_audio1526984540680_4_4.wav 40320
177
+ wavs_audio1526984546639_4_4.wav 40320
178
+ wavs_audio1526984547590_5_2.wav 61440
179
+ wavs_audio1526984552838_6_3.wav 64320
180
+ wavs_audio1526984556510_4_5.wav 63360
181
+ wavs_audio1526984559307_3_3.wav 43200
182
+ wavs_audio1526984569143_5_1.wav 48000
183
+ wavs_audio1526984570131_5_3.wav 60480
184
+ wavs_audio1526984573839_6_4.wav 100800
185
+ wavs_audio1526984575227_3_4.wav 52800
186
+ wavs_audio1526984577387_5_1.wav 58560
187
+ wavs_audio1526984579412_5_2.wav 54720
188
+ wavs_audio1526984586922_5_3.wav 46080
189
+ wavs_audio1526984588951_3_5.wav 60480
190
+ wavs_audio1526984591785_5_2.wav 65280
191
+ wavs_audio1526984595756_5_4.wav 42240
exp/asr_stats_raw_en_word/logdir/stats.10/train/stats_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feats
2
+ feats_lengths
exp/asr_stats_raw_en_word/logdir/stats.10/train/text_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526983626238_6_1.wav 21
2
+ wavs_audio1526983636871_5_2.wav 32
3
+ wavs_audio1526983641501_5_3.wav 29
4
+ wavs_audio1526983643559_6_2.wav 23
5
+ wavs_audio1526983652301_1_1.wav 18
6
+ wavs_audio1526983653669_5_1.wav 27
7
+ wavs_audio1526983663015_5_3.wav 29
8
+ wavs_audio1526983668420_1_2.wav 15
9
+ wavs_audio1526983671576_5_3.wav 29
10
+ wavs_audio1526983674709_5_5.wav 27
11
+ wavs_audio1526983682930_1_3.wav 9
12
+ wavs_audio1526983683850_5_6.wav 27
13
+ wavs_audio1526983684511_5_4.wav 29
14
+ wavs_audio1526983687446_6_4.wav 35
15
+ wavs_audio1526983694130_5_7.wav 29
16
+ wavs_audio1526983695453_1_4.wav 20
17
+ wavs_audio1526983711192_1_5.wav 17
18
+ wavs_audio1526983718405_5_5.wav 27
19
+ wavs_audio1526983722788_6_2.wav 23
20
+ wavs_audio1526983726631_5_6.wav 27
21
+ wavs_audio1526983730520_6_3.wav 33
22
+ wavs_audio1526983732202_1_6.wav 11
23
+ wavs_audio1526983741838_6_4.wav 35
24
+ wavs_audio1526983744462_5_7.wav 29
25
+ wavs_audio1526983759496_6_1.wav 21
26
+ wavs_audio1526983771215_6_2.wav 23
27
+ wavs_audio1526983778361_1_1.wav 18
28
+ wavs_audio1526983780570_6_3.wav 33
29
+ wavs_audio1526983788348_6_4.wav 35
30
+ wavs_audio1526983790175_2_4.wav 21
31
+ wavs_audio1526983800150_1_2.wav 15
32
+ wavs_audio1526983802451_2_5.wav 14
33
+ wavs_audio1526983812921_1_3.wav 9
34
+ wavs_audio1526983815268_2_6.wav 20
35
+ wavs_audio1526983824560_2_7.wav 20
36
+ wavs_audio1526983825156_1_4.wav 20
37
+ wavs_audio1526983850722_1_5.wav 17
38
+ wavs_audio1526983866391_1_6.wav 11
39
+ wavs_audio1526983882309_1_7.wav 31
40
+ wavs_audio1526983886714_3_1.wav 15
41
+ wavs_audio1526983895919_1_8.wav 33
42
+ wavs_audio1526983898472_3_2.wav 13
43
+ wavs_audio1526983911045_3_3.wav 15
44
+ wavs_audio1526983930113_3_4.wav 13
45
+ wavs_audio1526983931076_2_1.wav 23
46
+ wavs_audio1526983945675_2_2.wav 23
47
+ wavs_audio1526983972052_3_6.wav 20
48
+ wavs_audio1526983981691_1_3.wav 9
49
+ wavs_audio1526983990965_1_2.wav 15
50
+ wavs_audio1526983992330_3_7.wav 13
51
+ wavs_audio1526983996751_2_4.wav 21
52
+ wavs_audio1526983997319_1_2.wav 15
53
+ wavs_audio1526984003559_3_8.wav 13
54
+ wavs_audio1526984007551_2_5.wav 14
55
+ wavs_audio1526984011260_1_3.wav 9
56
+ wavs_audio1526984017034_2_6.wav 20
57
+ wavs_audio1526984027086_1_4.wav 20
58
+ wavs_audio1526984029175_4_1.wav 18
59
+ wavs_audio1526984031089_1_4.wav 20
60
+ wavs_audio1526984031785_1_1.wav 18
61
+ wavs_audio1526984036073_2_7.wav 20
62
+ wavs_audio1526984040829_1_5.wav 17
63
+ wavs_audio1526984044743_4_2.wav 20
64
+ wavs_audio1526984055641_4_3.wav 18
65
+ wavs_audio1526984071399_4_4.wav 12
66
+ wavs_audio1526984082880_4_5.wav 20
67
+ wavs_audio1526984083121_1_8.wav 33
68
+ wavs_audio1526984083509_1_7.wav 31
69
+ wavs_audio1526984106151_3_2.wav 13
70
+ wavs_audio1526984114132_5_1.wav 27
71
+ wavs_audio1526984121674_3_3.wav 15
72
+ wavs_audio1526984123594_2_4.wav 21
73
+ wavs_audio1526984135088_2_5.wav 14
74
+ wavs_audio1526984139552_3_4.wav 13
75
+ wavs_audio1526984144689_2_6.wav 20
76
+ wavs_audio1526984147192_1_1.wav 18
77
+ wavs_audio1526984148728_2_4.wav 21
78
+ wavs_audio1526984153209_2_7.wav 20
79
+ wavs_audio1526984156770_3_5.wav 20
80
+ wavs_audio1526984169153_3_6.wav 20
81
+ wavs_audio1526984171836_2_1.wav 23
82
+ wavs_audio1526984173406_2_5.wav 14
83
+ wavs_audio1526984173931_5_3.wav 29
84
+ wavs_audio1526984179972_3_1.wav 15
85
+ wavs_audio1526984186071_3_7.wav 13
86
+ wavs_audio1526984186573_2_2.wav 23
87
+ wavs_audio1526984197610_2_3.wav 21
88
+ wavs_audio1526984200561_3_2.wav 13
89
+ wavs_audio1526984206448_5_5.wav 27
90
+ wavs_audio1526984214431_3_3.wav 15
91
+ wavs_audio1526984217633_3_8.wav 13
92
+ wavs_audio1526984223567_1_1.wav 18
93
+ wavs_audio1526984236297_1_2.wav 15
94
+ wavs_audio1526984237031_3_4.wav 13
95
+ wavs_audio1526984241114_5_7.wav 29
96
+ wavs_audio1526984243264_1_4.wav 20
97
+ wavs_audio1526984245216_2_2.wav 23
98
+ wavs_audio1526984248224_1_3.wav 9
99
+ wavs_audio1526984249624_3_5.wav 20
100
+ wavs_audio1526984255903_2_5.wav 14
101
+ wavs_audio1526984256425_3_6.wav 20
102
+ wavs_audio1526984262616_2_2.wav 23
103
+ wavs_audio1526984265407_1_4.wav 20
104
+ wavs_audio1526984281593_6_1.wav 21
105
+ wavs_audio1526984282940_1_6.wav 11
106
+ wavs_audio1526984295750_2_7.wav 20
107
+ wavs_audio1526984296338_4_1.wav 18
108
+ wavs_audio1526984301655_1_6.wav 11
109
+ wavs_audio1526984307908_1_7.wav 31
110
+ wavs_audio1526984311940_3_2.wav 13
111
+ wavs_audio1526984318859_4_2.wav 20
112
+ wavs_audio1526984321249_3_1.wav 15
113
+ wavs_audio1526984323422_5_1.wav 27
114
+ wavs_audio1526984323982_4_1.wav 18
115
+ wavs_audio1526984326235_1_7.wav 31
116
+ wavs_audio1526984327222_4_3.wav 18
117
+ wavs_audio1526984333916_4_4.wav 12
118
+ wavs_audio1526984334159_4_2.wav 20
119
+ wavs_audio1526984337198_1_8.wav 33
120
+ wavs_audio1526984337797_6_3.wav 33
121
+ wavs_audio1526984345066_4_5.wav 20
122
+ wavs_audio1526984345363_4_3.wav 18
123
+ wavs_audio1526984349176_6_3.wav 33
124
+ wavs_audio1526984351152_3_3.wav 15
125
+ wavs_audio1526984354681_4_4.wav 12
126
+ wavs_audio1526984354872_2_4.wav 21
127
+ wavs_audio1526984357564_6_4.wav 35
128
+ wavs_audio1526984363716_4_5.wav 20
129
+ wavs_audio1526984365423_2_1.wav 23
130
+ wavs_audio1526984372167_3_4.wav 13
131
+ wavs_audio1526984375689_2_2.wav 23
132
+ wavs_audio1526984377956_2_1.wav 23
133
+ wavs_audio1526984381963_5_1.wav 27
134
+ wavs_audio1526984384912_3_5.wav 20
135
+ wavs_audio1526984392258_2_3.wav 21
136
+ wavs_audio1526984400068_6_4.wav 35
137
+ wavs_audio1526984402818_5_2.wav 32
138
+ wavs_audio1526984404232_5_2.wav 32
139
+ wavs_audio1526984407407_3_6.wav 20
140
+ wavs_audio1526984414330_2_6.wav 20
141
+ wavs_audio1526984416571_5_3.wav 29
142
+ wavs_audio1526984417944_2_2.wav 23
143
+ wavs_audio1526984421235_3_7.wav 13
144
+ wavs_audio1526984422523_2_7.wav 20
145
+ wavs_audio1526984426143_5_4.wav 29
146
+ wavs_audio1526984433191_2_3.wav 21
147
+ wavs_audio1526984435465_5_5.wav 27
148
+ wavs_audio1526984443372_5_6.wav 27
149
+ wavs_audio1526984444218_5_5.wav 27
150
+ wavs_audio1526984446328_3_8.wav 13
151
+ wavs_audio1526984455383_2_4.wav 21
152
+ wavs_audio1526984456002_3_7.wav 13
153
+ wavs_audio1526984456727_3_2.wav 13
154
+ wavs_audio1526984460345_5_6.wav 27
155
+ wavs_audio1526984469365_3_8.wav 13
156
+ wavs_audio1526984482150_3_3.wav 15
157
+ wavs_audio1526984485261_2_5.wav 14
158
+ wavs_audio1526984487880_4_2.wav 20
159
+ wavs_audio1526984492899_4_1.wav 18
160
+ wavs_audio1526984497103_5_7.wav 29
161
+ wavs_audio1526984500341_2_6.wav 20
162
+ wavs_audio1526984502455_6_1.wav 21
163
+ wavs_audio1526984504361_3_5.wav 20
164
+ wavs_audio1526984504722_4_4.wav 12
165
+ wavs_audio1526984506678_4_2.wav 20
166
+ wavs_audio1526984512402_6_2.wav 23
167
+ wavs_audio1526984513900_2_7.wav 20
168
+ wavs_audio1526984518655_4_1.wav 18
169
+ wavs_audio1526984523862_6_3.wav 33
170
+ wavs_audio1526984525492_4_3.wav 18
171
+ wavs_audio1526984528468_4_2.wav 20
172
+ wavs_audio1526984533901_6_4.wav 35
173
+ wavs_audio1526984535229_3_1.wav 15
174
+ wavs_audio1526984536286_4_3.wav 18
175
+ wavs_audio1526984538200_5_1.wav 27
176
+ wavs_audio1526984540680_4_4.wav 12
177
+ wavs_audio1526984546639_4_4.wav 12
178
+ wavs_audio1526984547590_5_2.wav 32
179
+ wavs_audio1526984552838_6_3.wav 33
180
+ wavs_audio1526984556510_4_5.wav 20
181
+ wavs_audio1526984559307_3_3.wav 15
182
+ wavs_audio1526984569143_5_1.wav 27
183
+ wavs_audio1526984570131_5_3.wav 29
184
+ wavs_audio1526984573839_6_4.wav 35
185
+ wavs_audio1526984575227_3_4.wav 13
186
+ wavs_audio1526984577387_5_1.wav 27
187
+ wavs_audio1526984579412_5_2.wav 32
188
+ wavs_audio1526984586922_5_3.wav 29
189
+ wavs_audio1526984588951_3_5.wav 20
190
+ wavs_audio1526984591785_5_2.wav 32
191
+ wavs_audio1526984595756_5_4.wav 29
exp/asr_stats_raw_en_word/logdir/stats.10/valid/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.10/valid/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.10/valid/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.10/valid/speech_shape ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526983286623_5_7.wav 43200
2
+ wavs_audio1526983324951_5_3.wav 90240
3
+ wavs_audio1526983385142_4_5.wav 48000
4
+ wavs_audio1526983478995_4_3.wav 50880
5
+ wavs_audio1526983516946_5_1.wav 60480
6
+ wavs_audio1526983544466_5_3.wav 67200
7
+ wavs_audio1526983563031_3_8.wav 36480
8
+ wavs_audio1526983564943_3_6.wav 41280
9
+ wavs_audio1526983592590_4_2.wav 40320
10
+ wavs_audio1526983663160_6_3.wav 60480
11
+ wavs_audio1526983920025_1_1.wav 80640
12
+ wavs_audio1526984094471_3_1.wav 41280
13
+ wavs_audio1526984098387_1_7.wav 77760
14
+ wavs_audio1526984102607_1_8.wav 72000
15
+ wavs_audio1526984124218_1_8.wav 76800
16
+ wavs_audio1526984189062_1_5.wav 79680
17
+ wavs_audio1526984191576_5_4.wav 71040
18
+ wavs_audio1526984208550_1_5.wav 53760
19
+ wavs_audio1526984218128_2_7.wav 59520
20
+ wavs_audio1526984225517_5_6.wav 73920
21
+ wavs_audio1526984236438_2_5.wav 63360
22
+ wavs_audio1526984273150_2_6.wav 57600
23
+ wavs_audio1526984301196_6_2.wav 69120
24
+ wavs_audio1526984345604_3_2.wav 45120
exp/asr_stats_raw_en_word/logdir/stats.10/valid/stats_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feats
2
+ feats_lengths
exp/asr_stats_raw_en_word/logdir/stats.10/valid/text_shape ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526983286623_5_7.wav 29
2
+ wavs_audio1526983324951_5_3.wav 29
3
+ wavs_audio1526983385142_4_5.wav 20
4
+ wavs_audio1526983478995_4_3.wav 18
5
+ wavs_audio1526983516946_5_1.wav 27
6
+ wavs_audio1526983544466_5_3.wav 29
7
+ wavs_audio1526983563031_3_8.wav 13
8
+ wavs_audio1526983564943_3_6.wav 20
9
+ wavs_audio1526983592590_4_2.wav 20
10
+ wavs_audio1526983663160_6_3.wav 33
11
+ wavs_audio1526983920025_1_1.wav 18
12
+ wavs_audio1526984094471_3_1.wav 15
13
+ wavs_audio1526984098387_1_7.wav 31
14
+ wavs_audio1526984102607_1_8.wav 33
15
+ wavs_audio1526984124218_1_8.wav 33
16
+ wavs_audio1526984189062_1_5.wav 17
17
+ wavs_audio1526984191576_5_4.wav 29
18
+ wavs_audio1526984208550_1_5.wav 17
19
+ wavs_audio1526984218128_2_7.wav 20
20
+ wavs_audio1526984225517_5_6.wav 27
21
+ wavs_audio1526984236438_2_5.wav 14
22
+ wavs_audio1526984273150_2_6.wav 20
23
+ wavs_audio1526984301196_6_2.wav 23
24
+ wavs_audio1526984345604_3_2.wav 13
exp/asr_stats_raw_en_word/logdir/stats.11.log ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running on r099.ib.bridges2.psc.edu
2
+ # Started at Tue Dec 21 22:24:57 EST 2021
3
+ # SLURMD_NODENAME=r099
4
+ # SLURM_ARRAY_JOB_ID=5730432
5
+ # SLURM_ARRAY_TASK_COUNT=32
6
+ # SLURM_ARRAY_TASK_ID=11
7
+ # SLURM_ARRAY_TASK_MAX=32
8
+ # SLURM_ARRAY_TASK_MIN=1
9
+ # SLURM_ARRAY_TASK_STEP=1
10
+ # SLURM_CLUSTER_NAME=bridges2
11
+ # SLURM_CONF=/var/spool/slurm/d/conf-cache/slurm.conf
12
+ # SLURM_CPUS_ON_NODE=1
13
+ # SLURM_EXPORT_ENV=PATH
14
+ # SLURM_GET_USER_ENV=1
15
+ # SLURM_GTIDS=0
16
+ # SLURM_JOBID=5730464
17
+ # SLURM_JOB_ACCOUNT=cis210027p
18
+ # SLURM_JOB_CPUS_PER_NODE=1
19
+ # SLURM_JOB_GID=24886
20
+ # SLURM_JOB_ID=5730464
21
+ # SLURM_JOB_NAME=stats.sh
22
+ # SLURM_JOB_NODELIST=r099
23
+ # SLURM_JOB_NUM_NODES=1
24
+ # SLURM_JOB_PARTITION=RM-shared
25
+ # SLURM_JOB_QOS=rm
26
+ # SLURM_JOB_UID=82326
27
+ # SLURM_JOB_USER=ganesank
28
+ # SLURM_LOCALID=0
29
+ # SLURM_MEM_PER_CPU=2000
30
+ # SLURM_NNODES=1
31
+ # SLURM_NODEID=0
32
+ # SLURM_NODELIST=r099
33
+ # SLURM_NODE_ALIASES='(null)'
34
+ # SLURM_OPEN_MODE=a
35
+ # SLURM_PRIO_PROCESS=0
36
+ # SLURM_PROCID=0
37
+ # SLURM_SUBMIT_DIR=/ocean/projects/cis210027p/ganesank/karthik_new/espnet/egs2/sinhala/asr1
38
+ # SLURM_SUBMIT_HOST=br012.ib.bridges2.psc.edu
39
+ # SLURM_TASKS_PER_NODE=1
40
+ # SLURM_TASK_PID=6420
41
+ # SLURM_TOPOLOGY_ADDR=r099
42
+ # SLURM_TOPOLOGY_ADDR_PATTERN=node
43
+ # SLURM_WORKING_CLUSTER=bridges2:br003:6814:9216:109
44
+ # python3 -m espnet2.bin.asr_train --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.11.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.11.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.11 --config conf/train_asr.yaml --frontend_conf fs=16k
45
+ /ocean/projects/cis210027p/ganesank/karthik_new/espnet/tools/venv/bin/python3 /ocean/projects/cis210027p/ganesank/karthik_new/espnet/espnet2/bin/asr_train.py --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.11.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.11.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.11 --config conf/train_asr.yaml --frontend_conf fs=16k
46
+ [r099] 2021-12-21 22:25:08,042 (asr:382) INFO: Vocabulary size: 40
47
+ [r099] 2021-12-21 22:25:09,019 (abs_task:1132) INFO: pytorch.version=1.8.1+cu102, cuda.available=False, cudnn.version=7605, cudnn.benchmark=False, cudnn.deterministic=True
48
+ [r099] 2021-12-21 22:25:09,024 (abs_task:1133) INFO: Model structure:
49
+ ESPnetASRModel(
50
+ (frontend): DefaultFrontend(
51
+ (stft): Stft(n_fft=512, win_length=512, hop_length=128, center=True, normalized=False, onesided=True)
52
+ (frontend): Frontend()
53
+ (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
54
+ )
55
+ (specaug): SpecAug(
56
+ (time_warp): TimeWarp(window=5, mode=bicubic)
57
+ (freq_mask): MaskAlongAxis(mask_width_range=[0, 30], num_mask=2, axis=freq)
58
+ (time_mask): MaskAlongAxis(mask_width_range=[0, 40], num_mask=2, axis=time)
59
+ )
60
+ (normalize): UtteranceMVN(norm_means=True, norm_vars=False)
61
+ (encoder): TransformerEncoder(
62
+ (embed): Conv2dSubsampling(
63
+ (conv): Sequential(
64
+ (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(2, 2))
65
+ (1): ReLU()
66
+ (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
67
+ (3): ReLU()
68
+ )
69
+ (out): Sequential(
70
+ (0): Linear(in_features=4864, out_features=256, bias=True)
71
+ (1): PositionalEncoding(
72
+ (dropout): Dropout(p=0.1, inplace=False)
73
+ )
74
+ )
75
+ )
76
+ (encoders): MultiSequential(
77
+ (0): EncoderLayer(
78
+ (self_attn): MultiHeadedAttention(
79
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
80
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
81
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
82
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
83
+ (dropout): Dropout(p=0.0, inplace=False)
84
+ )
85
+ (feed_forward): PositionwiseFeedForward(
86
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
87
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
88
+ (dropout): Dropout(p=0.1, inplace=False)
89
+ (activation): ReLU()
90
+ )
91
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
92
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
93
+ (dropout): Dropout(p=0.1, inplace=False)
94
+ )
95
+ (1): EncoderLayer(
96
+ (self_attn): MultiHeadedAttention(
97
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
98
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
99
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
100
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
101
+ (dropout): Dropout(p=0.0, inplace=False)
102
+ )
103
+ (feed_forward): PositionwiseFeedForward(
104
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
105
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
106
+ (dropout): Dropout(p=0.1, inplace=False)
107
+ (activation): ReLU()
108
+ )
109
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
110
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
111
+ (dropout): Dropout(p=0.1, inplace=False)
112
+ )
113
+ (2): EncoderLayer(
114
+ (self_attn): MultiHeadedAttention(
115
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
116
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
117
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
118
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
119
+ (dropout): Dropout(p=0.0, inplace=False)
120
+ )
121
+ (feed_forward): PositionwiseFeedForward(
122
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
123
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
124
+ (dropout): Dropout(p=0.1, inplace=False)
125
+ (activation): ReLU()
126
+ )
127
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
128
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
129
+ (dropout): Dropout(p=0.1, inplace=False)
130
+ )
131
+ (3): EncoderLayer(
132
+ (self_attn): MultiHeadedAttention(
133
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
134
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
135
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
136
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
137
+ (dropout): Dropout(p=0.0, inplace=False)
138
+ )
139
+ (feed_forward): PositionwiseFeedForward(
140
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
141
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
142
+ (dropout): Dropout(p=0.1, inplace=False)
143
+ (activation): ReLU()
144
+ )
145
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
146
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
147
+ (dropout): Dropout(p=0.1, inplace=False)
148
+ )
149
+ (4): EncoderLayer(
150
+ (self_attn): MultiHeadedAttention(
151
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
152
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
153
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
154
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
155
+ (dropout): Dropout(p=0.0, inplace=False)
156
+ )
157
+ (feed_forward): PositionwiseFeedForward(
158
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
159
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
160
+ (dropout): Dropout(p=0.1, inplace=False)
161
+ (activation): ReLU()
162
+ )
163
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
164
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
165
+ (dropout): Dropout(p=0.1, inplace=False)
166
+ )
167
+ (5): EncoderLayer(
168
+ (self_attn): MultiHeadedAttention(
169
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
170
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
171
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
172
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
173
+ (dropout): Dropout(p=0.0, inplace=False)
174
+ )
175
+ (feed_forward): PositionwiseFeedForward(
176
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
177
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
178
+ (dropout): Dropout(p=0.1, inplace=False)
179
+ (activation): ReLU()
180
+ )
181
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
182
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
183
+ (dropout): Dropout(p=0.1, inplace=False)
184
+ )
185
+ (6): EncoderLayer(
186
+ (self_attn): MultiHeadedAttention(
187
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
188
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
189
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
190
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
191
+ (dropout): Dropout(p=0.0, inplace=False)
192
+ )
193
+ (feed_forward): PositionwiseFeedForward(
194
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
195
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
196
+ (dropout): Dropout(p=0.1, inplace=False)
197
+ (activation): ReLU()
198
+ )
199
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
200
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
201
+ (dropout): Dropout(p=0.1, inplace=False)
202
+ )
203
+ (7): EncoderLayer(
204
+ (self_attn): MultiHeadedAttention(
205
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
206
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
207
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
208
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
209
+ (dropout): Dropout(p=0.0, inplace=False)
210
+ )
211
+ (feed_forward): PositionwiseFeedForward(
212
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
213
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
214
+ (dropout): Dropout(p=0.1, inplace=False)
215
+ (activation): ReLU()
216
+ )
217
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
218
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
219
+ (dropout): Dropout(p=0.1, inplace=False)
220
+ )
221
+ (8): EncoderLayer(
222
+ (self_attn): MultiHeadedAttention(
223
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
224
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
225
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
226
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
227
+ (dropout): Dropout(p=0.0, inplace=False)
228
+ )
229
+ (feed_forward): PositionwiseFeedForward(
230
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
231
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
232
+ (dropout): Dropout(p=0.1, inplace=False)
233
+ (activation): ReLU()
234
+ )
235
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
236
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
237
+ (dropout): Dropout(p=0.1, inplace=False)
238
+ )
239
+ (9): EncoderLayer(
240
+ (self_attn): MultiHeadedAttention(
241
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
242
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
243
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
244
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
245
+ (dropout): Dropout(p=0.0, inplace=False)
246
+ )
247
+ (feed_forward): PositionwiseFeedForward(
248
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
249
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
250
+ (dropout): Dropout(p=0.1, inplace=False)
251
+ (activation): ReLU()
252
+ )
253
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
254
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
255
+ (dropout): Dropout(p=0.1, inplace=False)
256
+ )
257
+ (10): EncoderLayer(
258
+ (self_attn): MultiHeadedAttention(
259
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
260
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
261
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
262
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
263
+ (dropout): Dropout(p=0.0, inplace=False)
264
+ )
265
+ (feed_forward): PositionwiseFeedForward(
266
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
267
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
268
+ (dropout): Dropout(p=0.1, inplace=False)
269
+ (activation): ReLU()
270
+ )
271
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
272
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
273
+ (dropout): Dropout(p=0.1, inplace=False)
274
+ )
275
+ (11): EncoderLayer(
276
+ (self_attn): MultiHeadedAttention(
277
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
278
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
279
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
280
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
281
+ (dropout): Dropout(p=0.0, inplace=False)
282
+ )
283
+ (feed_forward): PositionwiseFeedForward(
284
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
285
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
286
+ (dropout): Dropout(p=0.1, inplace=False)
287
+ (activation): ReLU()
288
+ )
289
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
290
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
291
+ (dropout): Dropout(p=0.1, inplace=False)
292
+ )
293
+ )
294
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
295
+ )
296
+ (decoder): TransformerDecoder(
297
+ (embed): Sequential(
298
+ (0): Embedding(40, 256)
299
+ (1): PositionalEncoding(
300
+ (dropout): Dropout(p=0.1, inplace=False)
301
+ )
302
+ )
303
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
304
+ (output_layer): Linear(in_features=256, out_features=40, bias=True)
305
+ (decoders): MultiSequential(
306
+ (0): DecoderLayer(
307
+ (self_attn): MultiHeadedAttention(
308
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
309
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
310
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
311
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
312
+ (dropout): Dropout(p=0.0, inplace=False)
313
+ )
314
+ (src_attn): MultiHeadedAttention(
315
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
316
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
317
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
318
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
319
+ (dropout): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (feed_forward): PositionwiseFeedForward(
322
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
323
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
324
+ (dropout): Dropout(p=0.1, inplace=False)
325
+ (activation): ReLU()
326
+ )
327
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
328
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
329
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
330
+ (dropout): Dropout(p=0.1, inplace=False)
331
+ )
332
+ (1): DecoderLayer(
333
+ (self_attn): MultiHeadedAttention(
334
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
335
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
336
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
337
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
338
+ (dropout): Dropout(p=0.0, inplace=False)
339
+ )
340
+ (src_attn): MultiHeadedAttention(
341
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
342
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
343
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
344
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
345
+ (dropout): Dropout(p=0.0, inplace=False)
346
+ )
347
+ (feed_forward): PositionwiseFeedForward(
348
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
349
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
350
+ (dropout): Dropout(p=0.1, inplace=False)
351
+ (activation): ReLU()
352
+ )
353
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
354
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
355
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
356
+ (dropout): Dropout(p=0.1, inplace=False)
357
+ )
358
+ (2): DecoderLayer(
359
+ (self_attn): MultiHeadedAttention(
360
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
361
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
362
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
363
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
364
+ (dropout): Dropout(p=0.0, inplace=False)
365
+ )
366
+ (src_attn): MultiHeadedAttention(
367
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
368
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
369
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
370
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
371
+ (dropout): Dropout(p=0.0, inplace=False)
372
+ )
373
+ (feed_forward): PositionwiseFeedForward(
374
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
375
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
376
+ (dropout): Dropout(p=0.1, inplace=False)
377
+ (activation): ReLU()
378
+ )
379
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
380
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
381
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
382
+ (dropout): Dropout(p=0.1, inplace=False)
383
+ )
384
+ (3): DecoderLayer(
385
+ (self_attn): MultiHeadedAttention(
386
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
387
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
388
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
389
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
390
+ (dropout): Dropout(p=0.0, inplace=False)
391
+ )
392
+ (src_attn): MultiHeadedAttention(
393
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
394
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
395
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
396
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
397
+ (dropout): Dropout(p=0.0, inplace=False)
398
+ )
399
+ (feed_forward): PositionwiseFeedForward(
400
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
401
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
402
+ (dropout): Dropout(p=0.1, inplace=False)
403
+ (activation): ReLU()
404
+ )
405
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
406
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
407
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
408
+ (dropout): Dropout(p=0.1, inplace=False)
409
+ )
410
+ (4): DecoderLayer(
411
+ (self_attn): MultiHeadedAttention(
412
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
413
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
414
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
415
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
416
+ (dropout): Dropout(p=0.0, inplace=False)
417
+ )
418
+ (src_attn): MultiHeadedAttention(
419
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
420
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
421
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
422
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
423
+ (dropout): Dropout(p=0.0, inplace=False)
424
+ )
425
+ (feed_forward): PositionwiseFeedForward(
426
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
427
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
428
+ (dropout): Dropout(p=0.1, inplace=False)
429
+ (activation): ReLU()
430
+ )
431
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
432
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
433
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
434
+ (dropout): Dropout(p=0.1, inplace=False)
435
+ )
436
+ (5): DecoderLayer(
437
+ (self_attn): MultiHeadedAttention(
438
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
439
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
440
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
441
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
442
+ (dropout): Dropout(p=0.0, inplace=False)
443
+ )
444
+ (src_attn): MultiHeadedAttention(
445
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
446
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
447
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
448
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
449
+ (dropout): Dropout(p=0.0, inplace=False)
450
+ )
451
+ (feed_forward): PositionwiseFeedForward(
452
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
453
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
454
+ (dropout): Dropout(p=0.1, inplace=False)
455
+ (activation): ReLU()
456
+ )
457
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
458
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
459
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
460
+ (dropout): Dropout(p=0.1, inplace=False)
461
+ )
462
+ )
463
+ )
464
+ (ctc): CTC(
465
+ (ctc_lo): Linear(in_features=256, out_features=40, bias=True)
466
+ (ctc_loss): CTCLoss()
467
+ )
468
+ (criterion_att): LabelSmoothingLoss(
469
+ (criterion): KLDivLoss()
470
+ )
471
+ )
472
+
473
+ Model summary:
474
+ Class Name: ESPnetASRModel
475
+ Total Number of model parameters: 27.12 M
476
+ Number of trainable parameters: 27.12 M (100.0%)
477
+ Size: 108.49 MB
478
+ Type: torch.float32
479
+ [r099] 2021-12-21 22:25:09,024 (abs_task:1136) INFO: Optimizer:
480
+ Adam (
481
+ Parameter Group 0
482
+ amsgrad: False
483
+ betas: (0.9, 0.999)
484
+ eps: 1e-08
485
+ initial_lr: 0.0002
486
+ lr: 8e-09
487
+ weight_decay: 0
488
+ )
489
+ [r099] 2021-12-21 22:25:09,024 (abs_task:1137) INFO: Scheduler: WarmupLR(warmup_steps=25000)
490
+ [r099] 2021-12-21 22:25:09,027 (abs_task:1146) INFO: Saving the configuration in exp/asr_stats_raw_en_word/logdir/stats.11/config.yaml
491
+ [r099] 2021-12-21 22:25:09,037 (abs_task:1157) INFO: Namespace(config='conf/train_asr.yaml', print_config=False, log_level='INFO', dry_run=False, iterator_type='sequence', output_dir='exp/asr_stats_raw_en_word/logdir/stats.11', ngpu=0, seed=0, num_workers=1, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=False, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=True, collect_stats=True, write_collected_feats=False, max_epoch=50, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[('train', 'loss', 'min'), ('valid', 'loss', 'min'), ('train', 'acc', 'max'), ('valid', 'acc', 'max')], keep_nbest_models=5, grad_clip=5.0, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=None, use_tensorboard=True, use_wandb=False, wandb_project=None, wandb_id=None, wandb_entity=None, wandb_name=None, wandb_model_log_interval=-1, detect_anomaly=False, pretrain_path=None, init_param=[], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=None, batch_size=20, valid_batch_size=None, batch_bins=1000000, valid_batch_bins=None, train_shape_file=['exp/asr_stats_raw_en_word/logdir/train.11.scp'], valid_shape_file=['exp/asr_stats_raw_en_word/logdir/valid.11.scp'], batch_type='folded', valid_batch_type=None, fold_length=[], sort_in_batch='descending', sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, train_data_path_and_name_and_type=[('dump/raw/train/wav.scp', 'speech', 'sound'), ('dump/raw/train/text', 'text', 'text')], valid_data_path_and_name_and_type=[('dump/raw/valid/wav.scp', 'speech', 'sound'), ('dump/raw/valid/text', 'text', 'text')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, valid_max_cache_size=None, optim='adam', optim_conf={'lr': 0.0002}, scheduler='warmuplr', scheduler_conf={'warmup_steps': 25000}, token_list=['<blank>', '<unk>', '්', 'න', 'ම', 'ක', 'ල', 'ි', 'ු', 'ග', 'ේ', 'ර', 'ත', 'ද', 'ව', 'ට', 'ඕ', 'ී', 'ප', 'ය', 'ෙ', 'ස', 'ණ', 'ා', 'ැ', 'RequestAcc.balance', 'Moneywithdraw', 'Moneydeposit', 'Moneytransfer', 'Billpayments', 'බ', 'ඉ', 'ශ', 'ෂ', 'ඩ', 'Creditcardpayments', 'එ', '\u200d', 'හ', '<sos/eos>'], init=None, input_size=None, ctc_conf={'dropout_rate': 0.0, 'ctc_type': 'builtin', 'reduce': True, 'ignore_nan_grad': True}, model_conf={'ctc_weight': 0.5, 'ignore_id': -1, 'lsm_weight': 0.0, 'length_normalized_loss': False, 'report_cer': True, 'report_wer': True, 'sym_space': '<space>', 'sym_blank': '<blank>', 'extract_feats_in_collect_stats': True}, use_preprocessor=True, token_type='word', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, speech_volume_normalize=None, rir_scp=None, rir_apply_prob=1.0, noise_scp=None, noise_apply_prob=1.0, noise_db_range='13_15', frontend='default', frontend_conf={'fs': '16k'}, specaug='specaug', specaug_conf={'apply_time_warp': True, 'time_warp_window': 5, 'time_warp_mode': 'bicubic', 'apply_freq_mask': True, 'freq_mask_width_range': [0, 30], 'num_freq_mask': 2, 'apply_time_mask': True, 'time_mask_width_range': [0, 40], 'num_time_mask': 2}, normalize='utterance_mvn', normalize_conf={}, preencoder=None, preencoder_conf={}, encoder='transformer', encoder_conf={'output_size': 256, 'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 12, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'attention_dropout_rate': 0.0, 'input_layer': 'conv2d', 'normalize_before': True}, postencoder=None, postencoder_conf={}, decoder='transformer', decoder_conf={'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 6, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'self_attention_dropout_rate': 0.0, 'src_attention_dropout_rate': 0.0}, required=['output_dir', 'token_list'], version='0.10.3a3', distributed=False)
492
+ # Accounting: begin_time=1640143497
493
+ # Accounting: end_time=1640143519
494
+ # Accounting: time=22 threads=1
495
+ # Finished at Tue Dec 21 22:25:19 EST 2021 with status 0
exp/asr_stats_raw_en_word/logdir/stats.11/config.yaml ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_stats_raw_en_word/logdir/stats.11
7
+ ngpu: 0
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: null
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: true
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - train
38
+ - loss
39
+ - min
40
+ - - valid
41
+ - loss
42
+ - min
43
+ - - train
44
+ - acc
45
+ - max
46
+ - - valid
47
+ - acc
48
+ - max
49
+ keep_nbest_models: 5
50
+ grad_clip: 5.0
51
+ grad_clip_type: 2.0
52
+ grad_noise: false
53
+ accum_grad: 1
54
+ no_forward_run: false
55
+ resume: false
56
+ train_dtype: float32
57
+ use_amp: false
58
+ log_interval: null
59
+ use_tensorboard: true
60
+ use_wandb: false
61
+ wandb_project: null
62
+ wandb_id: null
63
+ wandb_entity: null
64
+ wandb_name: null
65
+ wandb_model_log_interval: -1
66
+ detect_anomaly: false
67
+ pretrain_path: null
68
+ init_param: []
69
+ ignore_init_mismatch: false
70
+ freeze_param: []
71
+ num_iters_per_epoch: null
72
+ batch_size: 20
73
+ valid_batch_size: null
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/asr_stats_raw_en_word/logdir/train.11.scp
78
+ valid_shape_file:
79
+ - exp/asr_stats_raw_en_word/logdir/valid.11.scp
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length: []
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train/wav.scp
91
+ - speech
92
+ - sound
93
+ - - dump/raw/train/text
94
+ - text
95
+ - text
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/valid/wav.scp
98
+ - speech
99
+ - sound
100
+ - - dump/raw/valid/text
101
+ - text
102
+ - text
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.0002
110
+ scheduler: warmuplr
111
+ scheduler_conf:
112
+ warmup_steps: 25000
113
+ token_list:
114
+ - <blank>
115
+ - <unk>
116
+ - ්
117
+ - න
118
+ - ම
119
+ - ක
120
+ - ල
121
+ - ි
122
+ - ු
123
+ - ග
124
+ - ේ
125
+ - ර
126
+ - ත
127
+ - ද
128
+ - ව
129
+ - ට
130
+ - ඕ
131
+ - ී
132
+ - ප
133
+ - ය
134
+ - ෙ
135
+ - ස
136
+ - ණ
137
+ - ා
138
+ - ැ
139
+ - RequestAcc.balance
140
+ - Moneywithdraw
141
+ - Moneydeposit
142
+ - Moneytransfer
143
+ - Billpayments
144
+ - බ
145
+ - ඉ
146
+ - ශ
147
+ - ෂ
148
+ - ඩ
149
+ - Creditcardpayments
150
+ - එ
151
+ - ‍
152
+ - හ
153
+ - <sos/eos>
154
+ init: null
155
+ input_size: null
156
+ ctc_conf:
157
+ dropout_rate: 0.0
158
+ ctc_type: builtin
159
+ reduce: true
160
+ ignore_nan_grad: true
161
+ model_conf:
162
+ ctc_weight: 0.5
163
+ ignore_id: -1
164
+ lsm_weight: 0.0
165
+ length_normalized_loss: false
166
+ report_cer: true
167
+ report_wer: true
168
+ sym_space: <space>
169
+ sym_blank: <blank>
170
+ extract_feats_in_collect_stats: true
171
+ use_preprocessor: true
172
+ token_type: word
173
+ bpemodel: null
174
+ non_linguistic_symbols: null
175
+ cleaner: null
176
+ g2p: null
177
+ speech_volume_normalize: null
178
+ rir_scp: null
179
+ rir_apply_prob: 1.0
180
+ noise_scp: null
181
+ noise_apply_prob: 1.0
182
+ noise_db_range: '13_15'
183
+ frontend: default
184
+ frontend_conf:
185
+ fs: 16k
186
+ specaug: specaug
187
+ specaug_conf:
188
+ apply_time_warp: true
189
+ time_warp_window: 5
190
+ time_warp_mode: bicubic
191
+ apply_freq_mask: true
192
+ freq_mask_width_range:
193
+ - 0
194
+ - 30
195
+ num_freq_mask: 2
196
+ apply_time_mask: true
197
+ time_mask_width_range:
198
+ - 0
199
+ - 40
200
+ num_time_mask: 2
201
+ normalize: utterance_mvn
202
+ normalize_conf: {}
203
+ preencoder: null
204
+ preencoder_conf: {}
205
+ encoder: transformer
206
+ encoder_conf:
207
+ output_size: 256
208
+ attention_heads: 4
209
+ linear_units: 2048
210
+ num_blocks: 12
211
+ dropout_rate: 0.1
212
+ positional_dropout_rate: 0.1
213
+ attention_dropout_rate: 0.0
214
+ input_layer: conv2d
215
+ normalize_before: true
216
+ postencoder: null
217
+ postencoder_conf: {}
218
+ decoder: transformer
219
+ decoder_conf:
220
+ attention_heads: 4
221
+ linear_units: 2048
222
+ num_blocks: 6
223
+ dropout_rate: 0.1
224
+ positional_dropout_rate: 0.1
225
+ self_attention_dropout_rate: 0.0
226
+ src_attention_dropout_rate: 0.0
227
+ required:
228
+ - output_dir
229
+ - token_list
230
+ version: 0.10.3a3
231
+ distributed: false
exp/asr_stats_raw_en_word/logdir/stats.11/train/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.11/train/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.11/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.11/train/speech_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526984602629_3_6.wav 51840
2
+ wavs_audio1526984606896_5_5.wav 61440
3
+ wavs_audio1526984610105_5_3.wav 57600
4
+ wavs_audio1526984615099_3_7.wav 38400
5
+ wavs_audio1526984621281_5_6.wav 77760
6
+ wavs_audio1526984629715_5_7.wav 48960
7
+ wavs_audio1526984636403_5_7.wav 62400
8
+ wavs_audio1526984643040_6_1.wav 46080
9
+ wavs_audio1526984650962_6_2.wav 47040
10
+ wavs_audio1526984652622_4_1.wav 46080
11
+ wavs_audio1526984654940_1_1.wav 107520
12
+ wavs_audio1526984655280_5_5.wav 60480
13
+ wavs_audio1526984661490_6_3.wav 50880
14
+ wavs_audio1526984663485_6_1.wav 82560
15
+ wavs_audio1526984670577_5_6.wav 65280
16
+ wavs_audio1526984676449_6_4.wav 51840
17
+ wavs_audio1526984685159_5_7.wav 59520
18
+ wavs_audio1526984715970_1_3.wav 70080
19
+ wavs_audio1526984729286_6_2.wav 59520
20
+ wavs_audio1526984733224_6_1.wav 98880
21
+ wavs_audio1526984737083_1_4.wav 71040
22
+ wavs_audio1526984743061_6_3.wav 58560
23
+ wavs_audio1526984747137_1_1.wav 81600
24
+ wavs_audio1526984750940_4_1.wav 31680
25
+ wavs_audio1526984753531_1_5.wav 65280
26
+ wavs_audio1526984759961_6_4.wav 75840
27
+ wavs_audio1526984760580_6_3.wav 59520
28
+ wavs_audio1526984767669_1_2.wav 74880
29
+ wavs_audio1526984769766_1_6.wav 49920
30
+ wavs_audio1526984772542_4_2.wav 88320
31
+ wavs_audio1526984782069_1_3.wav 40320
32
+ wavs_audio1526984788067_4_3.wav 52800
33
+ wavs_audio1526984796332_1_4.wav 53760
34
+ wavs_audio1526984812263_4_5.wav 38400
35
+ wavs_audio1526984827811_2_1.wav 70080
36
+ wavs_audio1526984836904_5_1.wav 76800
37
+ wavs_audio1526984851904_5_2.wav 66240
38
+ wavs_audio1526984854445_2_3.wav 78720
39
+ wavs_audio1526984863287_1_7.wav 69120
40
+ wavs_audio1526984867484_2_4.wav 83520
41
+ wavs_audio1526984874070_5_3.wav 54720
42
+ wavs_audio1526984876105_1_8.wav 66240
43
+ wavs_audio1526984884127_2_5.wav 63360
44
+ wavs_audio1526984889424_5_4.wav 56640
45
+ wavs_audio1526984897049_2_6.wav 67200
46
+ wavs_audio1526984918482_2_7.wav 76800
47
+ wavs_audio1526984937456_2_3.wav 40320
48
+ wavs_audio1526984940663_5_4.wav 37440
49
+ wavs_audio1526984953188_1_1.wav 60480
50
+ wavs_audio1526984954166_2_2.wav 56640
51
+ wavs_audio1526984955453_1_1.wav 65280
52
+ wavs_audio1526984966116_2_3.wav 56640
53
+ wavs_audio1526984966716_1_2.wav 35520
54
+ wavs_audio1526984979832_1_3.wav 33600
55
+ wavs_audio1526984984486_3_2.wav 63360
56
+ wavs_audio1526984987570_2_5.wav 63360
57
+ wavs_audio1526984989387_1_3.wav 50880
58
+ wavs_audio1526984997187_5_5.wav 61440
59
+ wavs_audio1526985010117_2_6.wav 46080
60
+ wavs_audio1526985013186_5_6.wav 54720
61
+ wavs_audio1526985016193_1_5.wav 36480
62
+ wavs_audio1526985021031_2_7.wav 51840
63
+ wavs_audio1526985022105_1_5.wav 61440
64
+ wavs_audio1526985026556_3_5.wav 72000
65
+ wavs_audio1526985026940_1_6.wav 27840
66
+ wavs_audio1526985027741_5_7.wav 52800
67
+ wavs_audio1526985034297_1_6.wav 45120
68
+ wavs_audio1526985037569_1_7.wav 40320
69
+ wavs_audio1526985038725_3_6.wav 73920
70
+ wavs_audio1526985048499_1_7.wav 69120
71
+ wavs_audio1526985051021_1_8.wav 52800
72
+ wavs_audio1526985056080_3_2.wav 47040
73
+ wavs_audio1526985062105_1_8.wav 67200
74
+ wavs_audio1526985063328_2_4.wav 36480
75
+ wavs_audio1526985066949_3_3.wav 48960
76
+ wavs_audio1526985075041_3_8.wav 73920
77
+ wavs_audio1526985076424_6_1.wav 64320
78
+ wavs_audio1526985078460_2_4.wav 57600
79
+ wavs_audio1526985099760_4_1.wav 75840
80
+ wavs_audio1526985114121_3_6.wav 50880
81
+ wavs_audio1526985114292_4_2.wav 72000
82
+ wavs_audio1526985125749_3_7.wav 41280
83
+ wavs_audio1526985125890_4_3.wav 78720
84
+ wavs_audio1526985133561_3_1.wav 102720
85
+ wavs_audio1526985139204_3_8.wav 38400
86
+ wavs_audio1526985140297_2_6.wav 48960
87
+ wavs_audio1526985145800_4_4.wav 51840
88
+ wavs_audio1526985154609_1_2.wav 55680
89
+ wavs_audio1526985154890_3_3.wav 59520
90
+ wavs_audio1526985160301_6_2.wav 42240
91
+ wavs_audio1526985163521_3_4.wav 27840
92
+ wavs_audio1526985165400_2_7.wav 57600
93
+ wavs_audio1526985168489_1_3.wav 26880
94
+ wavs_audio1526985181875_3_5.wav 31680
95
+ wavs_audio1526985182786_6_3.wav 59520
96
+ wavs_audio1526985191547_3_6.wav 31680
97
+ wavs_audio1526985192554_4_4.wav 48000
98
+ wavs_audio1526985192555_4_4.wav 48000
99
+ wavs_audio1526985198227_6_4.wav 56640
100
+ wavs_audio1526985199398_3_7.wav 29760
101
+ wavs_audio1526985200453_5_1.wav 86400
102
+ wavs_audio1526985203912_4_5.wav 48960
103
+ wavs_audio1526985208375_3_8.wav 24960
104
+ wavs_audio1526985212377_1_5.wav 31680
105
+ wavs_audio1526985219145_3_3.wav 49920
106
+ wavs_audio1526985221101_1_6.wav 48960
107
+ wavs_audio1526985223090_5_2.wav 83520
108
+ wavs_audio1526985229351_4_2.wav 39360
109
+ wavs_audio1526985237111_3_4.wav 54720
110
+ wavs_audio1526985239172_5_3.wav 79680
111
+ wavs_audio1526985248309_5_2.wav 69120
112
+ wavs_audio1526985249863_3_5.wav 51840
113
+ wavs_audio1526985253528_1_8.wav 51840
114
+ wavs_audio1526985254519_4_3.wav 27840
115
+ wavs_audio1526985273327_5_5.wav 83520
116
+ wavs_audio1526985279112_5_4.wav 72000
117
+ wavs_audio1526985281186_3_7.wav 44160
118
+ wavs_audio1526985288394_4_5.wav 33600
119
+ wavs_audio1526985289109_2_1.wav 41280
120
+ wavs_audio1526985291276_3_8.wav 46080
121
+ wavs_audio1526985293509_5_5.wav 61440
122
+ wavs_audio1526985301946_2_2.wav 56640
123
+ wavs_audio1526985308192_1_5.wav 63360
124
+ wavs_audio1526985308781_1_2.wav 69120
125
+ wavs_audio1526985312760_2_3.wav 48960
126
+ wavs_audio1526985315798_1_3.wav 56640
127
+ wavs_audio1526985321737_1_6.wav 50880
128
+ wavs_audio1526985321887_2_4.wav 53760
129
+ wavs_audio1526985326172_1_3.wav 51840
130
+ wavs_audio1526985333614_1_7.wav 75840
131
+ wavs_audio1526985334281_4_3.wav 48960
132
+ wavs_audio1526985336955_5_7.wav 67200
133
+ wavs_audio1526985347270_1_8.wav 74880
134
+ wavs_audio1526985352099_2_7.wav 49920
135
+ wavs_audio1526985363089_1_6.wav 51840
136
+ wavs_audio1526985369835_6_1.wav 72000
137
+ wavs_audio1526985370370_5_4.wav 42240
138
+ wavs_audio1526985372359_1_1.wav 63360
139
+ wavs_audio1526985373862_3_1.wav 84480
140
+ wavs_audio1526985386612_5_5.wav 46080
141
+ wavs_audio1526985387973_6_2.wav 89280
142
+ wavs_audio1526985394561_5_1.wav 58560
143
+ wavs_audio1526985396204_5_6.wav 45120
144
+ wavs_audio1526985396337_3_6.wav 37440
145
+ wavs_audio1526985404906_1_8.wav 61440
146
+ wavs_audio1526985412257_2_1.wav 46080
147
+ wavs_audio1526985415661_5_7.wav 98880
148
+ wavs_audio1526985416723_6_4.wav 80640
149
+ wavs_audio1526985425547_5_6.wav 72960
150
+ wavs_audio1526985436519_5_7.wav 64320
151
+ wavs_audio1526985437223_6_1.wav 43200
152
+ wavs_audio1526985443344_2_5.wav 44160
153
+ wavs_audio1526985447143_4_3.wav 36480
154
+ wavs_audio1526985449392_6_2.wav 57600
155
+ wavs_audio1526985452841_6_1.wav 58560
156
+ wavs_audio1526985453486_2_6.wav 46080
157
+ wavs_audio1526985455235_4_4.wav 29760
158
+ wavs_audio1526985457216_1_5.wav 66240
159
+ wavs_audio1526985462205_4_5.wav 41280
160
+ wavs_audio1526985464114_2_7.wav 50880
161
+ wavs_audio1526985469912_1_4.wav 61440
162
+ wavs_audio1526985473990_6_3.wav 65280
163
+ wavs_audio1526985477187_6_4.wav 49920
164
+ wavs_audio1526985482607_1_2.wav 38400
165
+ wavs_audio1526985482769_5_5.wav 56640
166
+ wavs_audio1526985484101_1_5.wav 53760
167
+ wavs_audio1526985492075_1_3.wav 28800
168
+ wavs_audio1526985492938_5_6.wav 58560
169
+ wavs_audio1526985500156_1_4.wav 44160
170
+ wavs_audio1526985504537_5_7.wav 55680
171
+ wavs_audio1526985507902_1_7.wav 66240
172
+ wavs_audio1526985509955_1_5.wav 37440
173
+ wavs_audio1526985511640_1_7.wav 70080
174
+ wavs_audio1526985516630_1_6.wav 24000
175
+ wavs_audio1526985525691_6_1.wav 47040
176
+ wavs_audio1526985526757_1_8.wav 67200
177
+ wavs_audio1526985538442_6_2.wav 53760
178
+ wavs_audio1526985539179_6_1.wav 46080
179
+ wavs_audio1526985546134_2_4.wav 47040
180
+ wavs_audio1526985548388_4_1.wav 48960
181
+ wavs_audio1526985549115_6_2.wav 55680
182
+ wavs_audio1526985550464_1_8.wav 57600
183
+ wavs_audio1526985552389_1_1.wav 98880
184
+ wavs_audio1526985555793_2_5.wav 52800
185
+ wavs_audio1526985564579_2_6.wav 40320
186
+ wavs_audio1526985568317_6_4.wav 65280
187
+ wavs_audio1526985574403_2_7.wav 46080
188
+ wavs_audio1526985577889_1_2.wav 93120
189
+ wavs_audio1526985586019_4_3.wav 44160
190
+ wavs_audio1526985604764_3_3.wav 37440
191
+ wavs_audio1526985612198_4_4.wav 40320
exp/asr_stats_raw_en_word/logdir/stats.11/train/stats_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feats
2
+ feats_lengths
exp/asr_stats_raw_en_word/logdir/stats.11/train/text_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526984602629_3_6.wav 20
2
+ wavs_audio1526984606896_5_5.wav 27
3
+ wavs_audio1526984610105_5_3.wav 29
4
+ wavs_audio1526984615099_3_7.wav 13
5
+ wavs_audio1526984621281_5_6.wav 27
6
+ wavs_audio1526984629715_5_7.wav 29
7
+ wavs_audio1526984636403_5_7.wav 29
8
+ wavs_audio1526984643040_6_1.wav 21
9
+ wavs_audio1526984650962_6_2.wav 23
10
+ wavs_audio1526984652622_4_1.wav 18
11
+ wavs_audio1526984654940_1_1.wav 18
12
+ wavs_audio1526984655280_5_5.wav 27
13
+ wavs_audio1526984661490_6_3.wav 33
14
+ wavs_audio1526984663485_6_1.wav 21
15
+ wavs_audio1526984670577_5_6.wav 27
16
+ wavs_audio1526984676449_6_4.wav 35
17
+ wavs_audio1526984685159_5_7.wav 29
18
+ wavs_audio1526984715970_1_3.wav 9
19
+ wavs_audio1526984729286_6_2.wav 23
20
+ wavs_audio1526984733224_6_1.wav 21
21
+ wavs_audio1526984737083_1_4.wav 20
22
+ wavs_audio1526984743061_6_3.wav 33
23
+ wavs_audio1526984747137_1_1.wav 18
24
+ wavs_audio1526984750940_4_1.wav 18
25
+ wavs_audio1526984753531_1_5.wav 17
26
+ wavs_audio1526984759961_6_4.wav 35
27
+ wavs_audio1526984760580_6_3.wav 33
28
+ wavs_audio1526984767669_1_2.wav 15
29
+ wavs_audio1526984769766_1_6.wav 11
30
+ wavs_audio1526984772542_4_2.wav 20
31
+ wavs_audio1526984782069_1_3.wav 9
32
+ wavs_audio1526984788067_4_3.wav 18
33
+ wavs_audio1526984796332_1_4.wav 20
34
+ wavs_audio1526984812263_4_5.wav 20
35
+ wavs_audio1526984827811_2_1.wav 23
36
+ wavs_audio1526984836904_5_1.wav 27
37
+ wavs_audio1526984851904_5_2.wav 32
38
+ wavs_audio1526984854445_2_3.wav 21
39
+ wavs_audio1526984863287_1_7.wav 31
40
+ wavs_audio1526984867484_2_4.wav 21
41
+ wavs_audio1526984874070_5_3.wav 29
42
+ wavs_audio1526984876105_1_8.wav 33
43
+ wavs_audio1526984884127_2_5.wav 14
44
+ wavs_audio1526984889424_5_4.wav 29
45
+ wavs_audio1526984897049_2_6.wav 20
46
+ wavs_audio1526984918482_2_7.wav 20
47
+ wavs_audio1526984937456_2_3.wav 21
48
+ wavs_audio1526984940663_5_4.wav 29
49
+ wavs_audio1526984953188_1_1.wav 18
50
+ wavs_audio1526984954166_2_2.wav 23
51
+ wavs_audio1526984955453_1_1.wav 18
52
+ wavs_audio1526984966116_2_3.wav 21
53
+ wavs_audio1526984966716_1_2.wav 15
54
+ wavs_audio1526984979832_1_3.wav 9
55
+ wavs_audio1526984984486_3_2.wav 13
56
+ wavs_audio1526984987570_2_5.wav 14
57
+ wavs_audio1526984989387_1_3.wav 9
58
+ wavs_audio1526984997187_5_5.wav 27
59
+ wavs_audio1526985010117_2_6.wav 20
60
+ wavs_audio1526985013186_5_6.wav 27
61
+ wavs_audio1526985016193_1_5.wav 17
62
+ wavs_audio1526985021031_2_7.wav 20
63
+ wavs_audio1526985022105_1_5.wav 17
64
+ wavs_audio1526985026556_3_5.wav 20
65
+ wavs_audio1526985026940_1_6.wav 11
66
+ wavs_audio1526985027741_5_7.wav 29
67
+ wavs_audio1526985034297_1_6.wav 11
68
+ wavs_audio1526985037569_1_7.wav 31
69
+ wavs_audio1526985038725_3_6.wav 20
70
+ wavs_audio1526985048499_1_7.wav 31
71
+ wavs_audio1526985051021_1_8.wav 33
72
+ wavs_audio1526985056080_3_2.wav 13
73
+ wavs_audio1526985062105_1_8.wav 33
74
+ wavs_audio1526985063328_2_4.wav 21
75
+ wavs_audio1526985066949_3_3.wav 15
76
+ wavs_audio1526985075041_3_8.wav 13
77
+ wavs_audio1526985076424_6_1.wav 21
78
+ wavs_audio1526985078460_2_4.wav 21
79
+ wavs_audio1526985099760_4_1.wav 18
80
+ wavs_audio1526985114121_3_6.wav 20
81
+ wavs_audio1526985114292_4_2.wav 20
82
+ wavs_audio1526985125749_3_7.wav 13
83
+ wavs_audio1526985125890_4_3.wav 18
84
+ wavs_audio1526985133561_3_1.wav 15
85
+ wavs_audio1526985139204_3_8.wav 13
86
+ wavs_audio1526985140297_2_6.wav 20
87
+ wavs_audio1526985145800_4_4.wav 12
88
+ wavs_audio1526985154609_1_2.wav 15
89
+ wavs_audio1526985154890_3_3.wav 15
90
+ wavs_audio1526985160301_6_2.wav 23
91
+ wavs_audio1526985163521_3_4.wav 13
92
+ wavs_audio1526985165400_2_7.wav 20
93
+ wavs_audio1526985168489_1_3.wav 9
94
+ wavs_audio1526985181875_3_5.wav 20
95
+ wavs_audio1526985182786_6_3.wav 33
96
+ wavs_audio1526985191547_3_6.wav 20
97
+ wavs_audio1526985192554_4_4.wav 12
98
+ wavs_audio1526985192555_4_4.wav 12
99
+ wavs_audio1526985198227_6_4.wav 35
100
+ wavs_audio1526985199398_3_7.wav 13
101
+ wavs_audio1526985200453_5_1.wav 27
102
+ wavs_audio1526985203912_4_5.wav 20
103
+ wavs_audio1526985208375_3_8.wav 13
104
+ wavs_audio1526985212377_1_5.wav 17
105
+ wavs_audio1526985219145_3_3.wav 15
106
+ wavs_audio1526985221101_1_6.wav 11
107
+ wavs_audio1526985223090_5_2.wav 32
108
+ wavs_audio1526985229351_4_2.wav 20
109
+ wavs_audio1526985237111_3_4.wav 13
110
+ wavs_audio1526985239172_5_3.wav 29
111
+ wavs_audio1526985248309_5_2.wav 32
112
+ wavs_audio1526985249863_3_5.wav 20
113
+ wavs_audio1526985253528_1_8.wav 33
114
+ wavs_audio1526985254519_4_3.wav 18
115
+ wavs_audio1526985273327_5_5.wav 27
116
+ wavs_audio1526985279112_5_4.wav 29
117
+ wavs_audio1526985281186_3_7.wav 13
118
+ wavs_audio1526985288394_4_5.wav 20
119
+ wavs_audio1526985289109_2_1.wav 23
120
+ wavs_audio1526985291276_3_8.wav 13
121
+ wavs_audio1526985293509_5_5.wav 27
122
+ wavs_audio1526985301946_2_2.wav 23
123
+ wavs_audio1526985308192_1_5.wav 17
124
+ wavs_audio1526985308781_1_2.wav 15
125
+ wavs_audio1526985312760_2_3.wav 21
126
+ wavs_audio1526985315798_1_3.wav 9
127
+ wavs_audio1526985321737_1_6.wav 11
128
+ wavs_audio1526985321887_2_4.wav 21
129
+ wavs_audio1526985326172_1_3.wav 9
130
+ wavs_audio1526985333614_1_7.wav 31
131
+ wavs_audio1526985334281_4_3.wav 18
132
+ wavs_audio1526985336955_5_7.wav 29
133
+ wavs_audio1526985347270_1_8.wav 33
134
+ wavs_audio1526985352099_2_7.wav 20
135
+ wavs_audio1526985363089_1_6.wav 11
136
+ wavs_audio1526985369835_6_1.wav 21
137
+ wavs_audio1526985370370_5_4.wav 29
138
+ wavs_audio1526985372359_1_1.wav 18
139
+ wavs_audio1526985373862_3_1.wav 15
140
+ wavs_audio1526985386612_5_5.wav 27
141
+ wavs_audio1526985387973_6_2.wav 23
142
+ wavs_audio1526985394561_5_1.wav 27
143
+ wavs_audio1526985396204_5_6.wav 27
144
+ wavs_audio1526985396337_3_6.wav 20
145
+ wavs_audio1526985404906_1_8.wav 33
146
+ wavs_audio1526985412257_2_1.wav 23
147
+ wavs_audio1526985415661_5_7.wav 29
148
+ wavs_audio1526985416723_6_4.wav 35
149
+ wavs_audio1526985425547_5_6.wav 27
150
+ wavs_audio1526985436519_5_7.wav 29
151
+ wavs_audio1526985437223_6_1.wav 21
152
+ wavs_audio1526985443344_2_5.wav 14
153
+ wavs_audio1526985447143_4_3.wav 18
154
+ wavs_audio1526985449392_6_2.wav 23
155
+ wavs_audio1526985452841_6_1.wav 21
156
+ wavs_audio1526985453486_2_6.wav 20
157
+ wavs_audio1526985455235_4_4.wav 12
158
+ wavs_audio1526985457216_1_5.wav 17
159
+ wavs_audio1526985462205_4_5.wav 20
160
+ wavs_audio1526985464114_2_7.wav 20
161
+ wavs_audio1526985469912_1_4.wav 20
162
+ wavs_audio1526985473990_6_3.wav 33
163
+ wavs_audio1526985477187_6_4.wav 35
164
+ wavs_audio1526985482607_1_2.wav 15
165
+ wavs_audio1526985482769_5_5.wav 27
166
+ wavs_audio1526985484101_1_5.wav 17
167
+ wavs_audio1526985492075_1_3.wav 9
168
+ wavs_audio1526985492938_5_6.wav 27
169
+ wavs_audio1526985500156_1_4.wav 20
170
+ wavs_audio1526985504537_5_7.wav 29
171
+ wavs_audio1526985507902_1_7.wav 31
172
+ wavs_audio1526985509955_1_5.wav 17
173
+ wavs_audio1526985511640_1_7.wav 31
174
+ wavs_audio1526985516630_1_6.wav 11
175
+ wavs_audio1526985525691_6_1.wav 21
176
+ wavs_audio1526985526757_1_8.wav 33
177
+ wavs_audio1526985538442_6_2.wav 23
178
+ wavs_audio1526985539179_6_1.wav 21
179
+ wavs_audio1526985546134_2_4.wav 21
180
+ wavs_audio1526985548388_4_1.wav 18
181
+ wavs_audio1526985549115_6_2.wav 23
182
+ wavs_audio1526985550464_1_8.wav 33
183
+ wavs_audio1526985552389_1_1.wav 18
184
+ wavs_audio1526985555793_2_5.wav 14
185
+ wavs_audio1526985564579_2_6.wav 20
186
+ wavs_audio1526985568317_6_4.wav 35
187
+ wavs_audio1526985574403_2_7.wav 20
188
+ wavs_audio1526985577889_1_2.wav 15
189
+ wavs_audio1526985586019_4_3.wav 18
190
+ wavs_audio1526985604764_3_3.wav 15
191
+ wavs_audio1526985612198_4_4.wav 12
exp/asr_stats_raw_en_word/logdir/stats.11/valid/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.11/valid/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.11/valid/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.11/valid/speech_shape ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526984388383_2_1.wav 65280
2
+ wavs_audio1526984404711_2_5.wav 33600
3
+ wavs_audio1526984413796_6_4.wav 96960
4
+ wavs_audio1526984415584_5_3.wav 44160
5
+ wavs_audio1526984449871_3_1.wav 43200
6
+ wavs_audio1526984474571_4_1.wav 56640
7
+ wavs_audio1526984496503_3_4.wav 35520
8
+ wavs_audio1526984588328_5_4.wav 65280
9
+ wavs_audio1526984612454_5_6.wav 55680
10
+ wavs_audio1526984792401_6_4.wav 77760
11
+ wavs_audio1526984800745_4_4.wav 40320
12
+ wavs_audio1526984841890_2_2.wav 72960
13
+ wavs_audio1526984842433_1_6.wav 49920
14
+ wavs_audio1526984971124_3_1.wav 66240
15
+ wavs_audio1526984975810_1_2.wav 55680
16
+ wavs_audio1526984977437_2_4.wav 51840
17
+ wavs_audio1526984992628_1_4.wav 41280
18
+ wavs_audio1526985012725_3_4.wav 60480
19
+ wavs_audio1526985045071_3_1.wav 46080
20
+ wavs_audio1526985090396_2_5.wav 29760
21
+ wavs_audio1526985126422_2_5.wav 47040
22
+ wavs_audio1526985156008_4_5.wav 66240
23
+ wavs_audio1526985267535_1_3.wav 57600
24
+ wavs_audio1526985281042_4_4.wav 20160
exp/asr_stats_raw_en_word/logdir/stats.11/valid/stats_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feats
2
+ feats_lengths
exp/asr_stats_raw_en_word/logdir/stats.11/valid/text_shape ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526984388383_2_1.wav 23
2
+ wavs_audio1526984404711_2_5.wav 14
3
+ wavs_audio1526984413796_6_4.wav 35
4
+ wavs_audio1526984415584_5_3.wav 29
5
+ wavs_audio1526984449871_3_1.wav 15
6
+ wavs_audio1526984474571_4_1.wav 18
7
+ wavs_audio1526984496503_3_4.wav 13
8
+ wavs_audio1526984588328_5_4.wav 29
9
+ wavs_audio1526984612454_5_6.wav 27
10
+ wavs_audio1526984792401_6_4.wav 35
11
+ wavs_audio1526984800745_4_4.wav 12
12
+ wavs_audio1526984841890_2_2.wav 23
13
+ wavs_audio1526984842433_1_6.wav 11
14
+ wavs_audio1526984971124_3_1.wav 15
15
+ wavs_audio1526984975810_1_2.wav 15
16
+ wavs_audio1526984977437_2_4.wav 21
17
+ wavs_audio1526984992628_1_4.wav 20
18
+ wavs_audio1526985012725_3_4.wav 13
19
+ wavs_audio1526985045071_3_1.wav 15
20
+ wavs_audio1526985090396_2_5.wav 14
21
+ wavs_audio1526985126422_2_5.wav 14
22
+ wavs_audio1526985156008_4_5.wav 20
23
+ wavs_audio1526985267535_1_3.wav 9
24
+ wavs_audio1526985281042_4_4.wav 12
exp/asr_stats_raw_en_word/logdir/stats.12.log ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running on r099.ib.bridges2.psc.edu
2
+ # Started at Tue Dec 21 22:25:07 EST 2021
3
+ # SLURMD_NODENAME=r099
4
+ # SLURM_ARRAY_JOB_ID=5730432
5
+ # SLURM_ARRAY_TASK_COUNT=32
6
+ # SLURM_ARRAY_TASK_ID=12
7
+ # SLURM_ARRAY_TASK_MAX=32
8
+ # SLURM_ARRAY_TASK_MIN=1
9
+ # SLURM_ARRAY_TASK_STEP=1
10
+ # SLURM_CLUSTER_NAME=bridges2
11
+ # SLURM_CONF=/var/spool/slurm/d/conf-cache/slurm.conf
12
+ # SLURM_CPUS_ON_NODE=1
13
+ # SLURM_EXPORT_ENV=PATH
14
+ # SLURM_GET_USER_ENV=1
15
+ # SLURM_GTIDS=0
16
+ # SLURM_JOBID=5730465
17
+ # SLURM_JOB_ACCOUNT=cis210027p
18
+ # SLURM_JOB_CPUS_PER_NODE=1
19
+ # SLURM_JOB_GID=24886
20
+ # SLURM_JOB_ID=5730465
21
+ # SLURM_JOB_NAME=stats.sh
22
+ # SLURM_JOB_NODELIST=r099
23
+ # SLURM_JOB_NUM_NODES=1
24
+ # SLURM_JOB_PARTITION=RM-shared
25
+ # SLURM_JOB_QOS=rm
26
+ # SLURM_JOB_UID=82326
27
+ # SLURM_JOB_USER=ganesank
28
+ # SLURM_LOCALID=0
29
+ # SLURM_MEM_PER_CPU=2000
30
+ # SLURM_NNODES=1
31
+ # SLURM_NODEID=0
32
+ # SLURM_NODELIST=r099
33
+ # SLURM_NODE_ALIASES='(null)'
34
+ # SLURM_OPEN_MODE=a
35
+ # SLURM_PRIO_PROCESS=0
36
+ # SLURM_PROCID=0
37
+ # SLURM_SUBMIT_DIR=/ocean/projects/cis210027p/ganesank/karthik_new/espnet/egs2/sinhala/asr1
38
+ # SLURM_SUBMIT_HOST=br012.ib.bridges2.psc.edu
39
+ # SLURM_TASKS_PER_NODE=1
40
+ # SLURM_TASK_PID=7310
41
+ # SLURM_TOPOLOGY_ADDR=r099
42
+ # SLURM_TOPOLOGY_ADDR_PATTERN=node
43
+ # SLURM_WORKING_CLUSTER=bridges2:br003:6814:9216:109
44
+ # python3 -m espnet2.bin.asr_train --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.12.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.12.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.12 --config conf/train_asr.yaml --frontend_conf fs=16k
45
+ /ocean/projects/cis210027p/ganesank/karthik_new/espnet/tools/venv/bin/python3 /ocean/projects/cis210027p/ganesank/karthik_new/espnet/espnet2/bin/asr_train.py --collect_stats true --use_preprocessor true --bpemodel none --token_type word --token_list data/en_token_list/word/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_data_path_and_name_and_type dump/raw/train/text,text,text --valid_data_path_and_name_and_type dump/raw/valid/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/valid/text,text,text --train_shape_file exp/asr_stats_raw_en_word/logdir/train.12.scp --valid_shape_file exp/asr_stats_raw_en_word/logdir/valid.12.scp --output_dir exp/asr_stats_raw_en_word/logdir/stats.12 --config conf/train_asr.yaml --frontend_conf fs=16k
46
+ [r099] 2021-12-21 22:25:10,190 (asr:382) INFO: Vocabulary size: 40
47
+ [r099] 2021-12-21 22:25:10,529 (abs_task:1132) INFO: pytorch.version=1.8.1+cu102, cuda.available=False, cudnn.version=7605, cudnn.benchmark=False, cudnn.deterministic=True
48
+ [r099] 2021-12-21 22:25:10,534 (abs_task:1133) INFO: Model structure:
49
+ ESPnetASRModel(
50
+ (frontend): DefaultFrontend(
51
+ (stft): Stft(n_fft=512, win_length=512, hop_length=128, center=True, normalized=False, onesided=True)
52
+ (frontend): Frontend()
53
+ (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
54
+ )
55
+ (specaug): SpecAug(
56
+ (time_warp): TimeWarp(window=5, mode=bicubic)
57
+ (freq_mask): MaskAlongAxis(mask_width_range=[0, 30], num_mask=2, axis=freq)
58
+ (time_mask): MaskAlongAxis(mask_width_range=[0, 40], num_mask=2, axis=time)
59
+ )
60
+ (normalize): UtteranceMVN(norm_means=True, norm_vars=False)
61
+ (encoder): TransformerEncoder(
62
+ (embed): Conv2dSubsampling(
63
+ (conv): Sequential(
64
+ (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(2, 2))
65
+ (1): ReLU()
66
+ (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
67
+ (3): ReLU()
68
+ )
69
+ (out): Sequential(
70
+ (0): Linear(in_features=4864, out_features=256, bias=True)
71
+ (1): PositionalEncoding(
72
+ (dropout): Dropout(p=0.1, inplace=False)
73
+ )
74
+ )
75
+ )
76
+ (encoders): MultiSequential(
77
+ (0): EncoderLayer(
78
+ (self_attn): MultiHeadedAttention(
79
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
80
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
81
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
82
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
83
+ (dropout): Dropout(p=0.0, inplace=False)
84
+ )
85
+ (feed_forward): PositionwiseFeedForward(
86
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
87
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
88
+ (dropout): Dropout(p=0.1, inplace=False)
89
+ (activation): ReLU()
90
+ )
91
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
92
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
93
+ (dropout): Dropout(p=0.1, inplace=False)
94
+ )
95
+ (1): EncoderLayer(
96
+ (self_attn): MultiHeadedAttention(
97
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
98
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
99
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
100
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
101
+ (dropout): Dropout(p=0.0, inplace=False)
102
+ )
103
+ (feed_forward): PositionwiseFeedForward(
104
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
105
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
106
+ (dropout): Dropout(p=0.1, inplace=False)
107
+ (activation): ReLU()
108
+ )
109
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
110
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
111
+ (dropout): Dropout(p=0.1, inplace=False)
112
+ )
113
+ (2): EncoderLayer(
114
+ (self_attn): MultiHeadedAttention(
115
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
116
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
117
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
118
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
119
+ (dropout): Dropout(p=0.0, inplace=False)
120
+ )
121
+ (feed_forward): PositionwiseFeedForward(
122
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
123
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
124
+ (dropout): Dropout(p=0.1, inplace=False)
125
+ (activation): ReLU()
126
+ )
127
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
128
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
129
+ (dropout): Dropout(p=0.1, inplace=False)
130
+ )
131
+ (3): EncoderLayer(
132
+ (self_attn): MultiHeadedAttention(
133
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
134
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
135
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
136
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
137
+ (dropout): Dropout(p=0.0, inplace=False)
138
+ )
139
+ (feed_forward): PositionwiseFeedForward(
140
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
141
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
142
+ (dropout): Dropout(p=0.1, inplace=False)
143
+ (activation): ReLU()
144
+ )
145
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
146
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
147
+ (dropout): Dropout(p=0.1, inplace=False)
148
+ )
149
+ (4): EncoderLayer(
150
+ (self_attn): MultiHeadedAttention(
151
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
152
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
153
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
154
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
155
+ (dropout): Dropout(p=0.0, inplace=False)
156
+ )
157
+ (feed_forward): PositionwiseFeedForward(
158
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
159
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
160
+ (dropout): Dropout(p=0.1, inplace=False)
161
+ (activation): ReLU()
162
+ )
163
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
164
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
165
+ (dropout): Dropout(p=0.1, inplace=False)
166
+ )
167
+ (5): EncoderLayer(
168
+ (self_attn): MultiHeadedAttention(
169
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
170
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
171
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
172
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
173
+ (dropout): Dropout(p=0.0, inplace=False)
174
+ )
175
+ (feed_forward): PositionwiseFeedForward(
176
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
177
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
178
+ (dropout): Dropout(p=0.1, inplace=False)
179
+ (activation): ReLU()
180
+ )
181
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
182
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
183
+ (dropout): Dropout(p=0.1, inplace=False)
184
+ )
185
+ (6): EncoderLayer(
186
+ (self_attn): MultiHeadedAttention(
187
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
188
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
189
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
190
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
191
+ (dropout): Dropout(p=0.0, inplace=False)
192
+ )
193
+ (feed_forward): PositionwiseFeedForward(
194
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
195
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
196
+ (dropout): Dropout(p=0.1, inplace=False)
197
+ (activation): ReLU()
198
+ )
199
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
200
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
201
+ (dropout): Dropout(p=0.1, inplace=False)
202
+ )
203
+ (7): EncoderLayer(
204
+ (self_attn): MultiHeadedAttention(
205
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
206
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
207
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
208
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
209
+ (dropout): Dropout(p=0.0, inplace=False)
210
+ )
211
+ (feed_forward): PositionwiseFeedForward(
212
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
213
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
214
+ (dropout): Dropout(p=0.1, inplace=False)
215
+ (activation): ReLU()
216
+ )
217
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
218
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
219
+ (dropout): Dropout(p=0.1, inplace=False)
220
+ )
221
+ (8): EncoderLayer(
222
+ (self_attn): MultiHeadedAttention(
223
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
224
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
225
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
226
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
227
+ (dropout): Dropout(p=0.0, inplace=False)
228
+ )
229
+ (feed_forward): PositionwiseFeedForward(
230
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
231
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
232
+ (dropout): Dropout(p=0.1, inplace=False)
233
+ (activation): ReLU()
234
+ )
235
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
236
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
237
+ (dropout): Dropout(p=0.1, inplace=False)
238
+ )
239
+ (9): EncoderLayer(
240
+ (self_attn): MultiHeadedAttention(
241
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
242
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
243
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
244
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
245
+ (dropout): Dropout(p=0.0, inplace=False)
246
+ )
247
+ (feed_forward): PositionwiseFeedForward(
248
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
249
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
250
+ (dropout): Dropout(p=0.1, inplace=False)
251
+ (activation): ReLU()
252
+ )
253
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
254
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
255
+ (dropout): Dropout(p=0.1, inplace=False)
256
+ )
257
+ (10): EncoderLayer(
258
+ (self_attn): MultiHeadedAttention(
259
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
260
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
261
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
262
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
263
+ (dropout): Dropout(p=0.0, inplace=False)
264
+ )
265
+ (feed_forward): PositionwiseFeedForward(
266
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
267
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
268
+ (dropout): Dropout(p=0.1, inplace=False)
269
+ (activation): ReLU()
270
+ )
271
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
272
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
273
+ (dropout): Dropout(p=0.1, inplace=False)
274
+ )
275
+ (11): EncoderLayer(
276
+ (self_attn): MultiHeadedAttention(
277
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
278
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
279
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
280
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
281
+ (dropout): Dropout(p=0.0, inplace=False)
282
+ )
283
+ (feed_forward): PositionwiseFeedForward(
284
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
285
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
286
+ (dropout): Dropout(p=0.1, inplace=False)
287
+ (activation): ReLU()
288
+ )
289
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
290
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
291
+ (dropout): Dropout(p=0.1, inplace=False)
292
+ )
293
+ )
294
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
295
+ )
296
+ (decoder): TransformerDecoder(
297
+ (embed): Sequential(
298
+ (0): Embedding(40, 256)
299
+ (1): PositionalEncoding(
300
+ (dropout): Dropout(p=0.1, inplace=False)
301
+ )
302
+ )
303
+ (after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
304
+ (output_layer): Linear(in_features=256, out_features=40, bias=True)
305
+ (decoders): MultiSequential(
306
+ (0): DecoderLayer(
307
+ (self_attn): MultiHeadedAttention(
308
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
309
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
310
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
311
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
312
+ (dropout): Dropout(p=0.0, inplace=False)
313
+ )
314
+ (src_attn): MultiHeadedAttention(
315
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
316
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
317
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
318
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
319
+ (dropout): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (feed_forward): PositionwiseFeedForward(
322
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
323
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
324
+ (dropout): Dropout(p=0.1, inplace=False)
325
+ (activation): ReLU()
326
+ )
327
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
328
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
329
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
330
+ (dropout): Dropout(p=0.1, inplace=False)
331
+ )
332
+ (1): DecoderLayer(
333
+ (self_attn): MultiHeadedAttention(
334
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
335
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
336
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
337
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
338
+ (dropout): Dropout(p=0.0, inplace=False)
339
+ )
340
+ (src_attn): MultiHeadedAttention(
341
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
342
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
343
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
344
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
345
+ (dropout): Dropout(p=0.0, inplace=False)
346
+ )
347
+ (feed_forward): PositionwiseFeedForward(
348
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
349
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
350
+ (dropout): Dropout(p=0.1, inplace=False)
351
+ (activation): ReLU()
352
+ )
353
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
354
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
355
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
356
+ (dropout): Dropout(p=0.1, inplace=False)
357
+ )
358
+ (2): DecoderLayer(
359
+ (self_attn): MultiHeadedAttention(
360
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
361
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
362
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
363
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
364
+ (dropout): Dropout(p=0.0, inplace=False)
365
+ )
366
+ (src_attn): MultiHeadedAttention(
367
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
368
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
369
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
370
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
371
+ (dropout): Dropout(p=0.0, inplace=False)
372
+ )
373
+ (feed_forward): PositionwiseFeedForward(
374
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
375
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
376
+ (dropout): Dropout(p=0.1, inplace=False)
377
+ (activation): ReLU()
378
+ )
379
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
380
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
381
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
382
+ (dropout): Dropout(p=0.1, inplace=False)
383
+ )
384
+ (3): DecoderLayer(
385
+ (self_attn): MultiHeadedAttention(
386
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
387
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
388
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
389
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
390
+ (dropout): Dropout(p=0.0, inplace=False)
391
+ )
392
+ (src_attn): MultiHeadedAttention(
393
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
394
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
395
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
396
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
397
+ (dropout): Dropout(p=0.0, inplace=False)
398
+ )
399
+ (feed_forward): PositionwiseFeedForward(
400
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
401
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
402
+ (dropout): Dropout(p=0.1, inplace=False)
403
+ (activation): ReLU()
404
+ )
405
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
406
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
407
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
408
+ (dropout): Dropout(p=0.1, inplace=False)
409
+ )
410
+ (4): DecoderLayer(
411
+ (self_attn): MultiHeadedAttention(
412
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
413
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
414
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
415
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
416
+ (dropout): Dropout(p=0.0, inplace=False)
417
+ )
418
+ (src_attn): MultiHeadedAttention(
419
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
420
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
421
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
422
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
423
+ (dropout): Dropout(p=0.0, inplace=False)
424
+ )
425
+ (feed_forward): PositionwiseFeedForward(
426
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
427
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
428
+ (dropout): Dropout(p=0.1, inplace=False)
429
+ (activation): ReLU()
430
+ )
431
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
432
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
433
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
434
+ (dropout): Dropout(p=0.1, inplace=False)
435
+ )
436
+ (5): DecoderLayer(
437
+ (self_attn): MultiHeadedAttention(
438
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
439
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
440
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
441
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
442
+ (dropout): Dropout(p=0.0, inplace=False)
443
+ )
444
+ (src_attn): MultiHeadedAttention(
445
+ (linear_q): Linear(in_features=256, out_features=256, bias=True)
446
+ (linear_k): Linear(in_features=256, out_features=256, bias=True)
447
+ (linear_v): Linear(in_features=256, out_features=256, bias=True)
448
+ (linear_out): Linear(in_features=256, out_features=256, bias=True)
449
+ (dropout): Dropout(p=0.0, inplace=False)
450
+ )
451
+ (feed_forward): PositionwiseFeedForward(
452
+ (w_1): Linear(in_features=256, out_features=2048, bias=True)
453
+ (w_2): Linear(in_features=2048, out_features=256, bias=True)
454
+ (dropout): Dropout(p=0.1, inplace=False)
455
+ (activation): ReLU()
456
+ )
457
+ (norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
458
+ (norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
459
+ (norm3): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
460
+ (dropout): Dropout(p=0.1, inplace=False)
461
+ )
462
+ )
463
+ )
464
+ (ctc): CTC(
465
+ (ctc_lo): Linear(in_features=256, out_features=40, bias=True)
466
+ (ctc_loss): CTCLoss()
467
+ )
468
+ (criterion_att): LabelSmoothingLoss(
469
+ (criterion): KLDivLoss()
470
+ )
471
+ )
472
+
473
+ Model summary:
474
+ Class Name: ESPnetASRModel
475
+ Total Number of model parameters: 27.12 M
476
+ Number of trainable parameters: 27.12 M (100.0%)
477
+ Size: 108.49 MB
478
+ Type: torch.float32
479
+ [r099] 2021-12-21 22:25:10,534 (abs_task:1136) INFO: Optimizer:
480
+ Adam (
481
+ Parameter Group 0
482
+ amsgrad: False
483
+ betas: (0.9, 0.999)
484
+ eps: 1e-08
485
+ initial_lr: 0.0002
486
+ lr: 8e-09
487
+ weight_decay: 0
488
+ )
489
+ [r099] 2021-12-21 22:25:10,534 (abs_task:1137) INFO: Scheduler: WarmupLR(warmup_steps=25000)
490
+ [r099] 2021-12-21 22:25:10,536 (abs_task:1146) INFO: Saving the configuration in exp/asr_stats_raw_en_word/logdir/stats.12/config.yaml
491
+ [r099] 2021-12-21 22:25:10,546 (abs_task:1157) INFO: Namespace(config='conf/train_asr.yaml', print_config=False, log_level='INFO', dry_run=False, iterator_type='sequence', output_dir='exp/asr_stats_raw_en_word/logdir/stats.12', ngpu=0, seed=0, num_workers=1, num_att_plot=3, dist_backend='nccl', dist_init_method='env://', dist_world_size=None, dist_rank=None, local_rank=None, dist_master_addr=None, dist_master_port=None, dist_launcher=None, multiprocessing_distributed=False, unused_parameters=False, sharded_ddp=False, cudnn_enabled=True, cudnn_benchmark=False, cudnn_deterministic=True, collect_stats=True, write_collected_feats=False, max_epoch=50, patience=None, val_scheduler_criterion=('valid', 'loss'), early_stopping_criterion=('valid', 'loss', 'min'), best_model_criterion=[('train', 'loss', 'min'), ('valid', 'loss', 'min'), ('train', 'acc', 'max'), ('valid', 'acc', 'max')], keep_nbest_models=5, grad_clip=5.0, grad_clip_type=2.0, grad_noise=False, accum_grad=1, no_forward_run=False, resume=False, train_dtype='float32', use_amp=False, log_interval=None, use_tensorboard=True, use_wandb=False, wandb_project=None, wandb_id=None, wandb_entity=None, wandb_name=None, wandb_model_log_interval=-1, detect_anomaly=False, pretrain_path=None, init_param=[], ignore_init_mismatch=False, freeze_param=[], num_iters_per_epoch=None, batch_size=20, valid_batch_size=None, batch_bins=1000000, valid_batch_bins=None, train_shape_file=['exp/asr_stats_raw_en_word/logdir/train.12.scp'], valid_shape_file=['exp/asr_stats_raw_en_word/logdir/valid.12.scp'], batch_type='folded', valid_batch_type=None, fold_length=[], sort_in_batch='descending', sort_batch='descending', multiple_iterator=False, chunk_length=500, chunk_shift_ratio=0.5, num_cache_chunks=1024, train_data_path_and_name_and_type=[('dump/raw/train/wav.scp', 'speech', 'sound'), ('dump/raw/train/text', 'text', 'text')], valid_data_path_and_name_and_type=[('dump/raw/valid/wav.scp', 'speech', 'sound'), ('dump/raw/valid/text', 'text', 'text')], allow_variable_data_keys=False, max_cache_size=0.0, max_cache_fd=32, valid_max_cache_size=None, optim='adam', optim_conf={'lr': 0.0002}, scheduler='warmuplr', scheduler_conf={'warmup_steps': 25000}, token_list=['<blank>', '<unk>', '්', 'න', 'ම', 'ක', 'ල', 'ි', 'ු', 'ග', 'ේ', 'ර', 'ත', 'ද', 'ව', 'ට', 'ඕ', 'ී', 'ප', 'ය', 'ෙ', 'ස', 'ණ', 'ා', 'ැ', 'RequestAcc.balance', 'Moneywithdraw', 'Moneydeposit', 'Moneytransfer', 'Billpayments', 'බ', 'ඉ', 'ශ', 'ෂ', 'ඩ', 'Creditcardpayments', 'එ', '\u200d', 'හ', '<sos/eos>'], init=None, input_size=None, ctc_conf={'dropout_rate': 0.0, 'ctc_type': 'builtin', 'reduce': True, 'ignore_nan_grad': True}, model_conf={'ctc_weight': 0.5, 'ignore_id': -1, 'lsm_weight': 0.0, 'length_normalized_loss': False, 'report_cer': True, 'report_wer': True, 'sym_space': '<space>', 'sym_blank': '<blank>', 'extract_feats_in_collect_stats': True}, use_preprocessor=True, token_type='word', bpemodel=None, non_linguistic_symbols=None, cleaner=None, g2p=None, speech_volume_normalize=None, rir_scp=None, rir_apply_prob=1.0, noise_scp=None, noise_apply_prob=1.0, noise_db_range='13_15', frontend='default', frontend_conf={'fs': '16k'}, specaug='specaug', specaug_conf={'apply_time_warp': True, 'time_warp_window': 5, 'time_warp_mode': 'bicubic', 'apply_freq_mask': True, 'freq_mask_width_range': [0, 30], 'num_freq_mask': 2, 'apply_time_mask': True, 'time_mask_width_range': [0, 40], 'num_time_mask': 2}, normalize='utterance_mvn', normalize_conf={}, preencoder=None, preencoder_conf={}, encoder='transformer', encoder_conf={'output_size': 256, 'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 12, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'attention_dropout_rate': 0.0, 'input_layer': 'conv2d', 'normalize_before': True}, postencoder=None, postencoder_conf={}, decoder='transformer', decoder_conf={'attention_heads': 4, 'linear_units': 2048, 'num_blocks': 6, 'dropout_rate': 0.1, 'positional_dropout_rate': 0.1, 'self_attention_dropout_rate': 0.0, 'src_attention_dropout_rate': 0.0}, required=['output_dir', 'token_list'], version='0.10.3a3', distributed=False)
492
+ # Accounting: begin_time=1640143507
493
+ # Accounting: end_time=1640143519
494
+ # Accounting: time=12 threads=1
495
+ # Finished at Tue Dec 21 22:25:19 EST 2021 with status 0
exp/asr_stats_raw_en_word/logdir/stats.12/config.yaml ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_stats_raw_en_word/logdir/stats.12
7
+ ngpu: 0
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: null
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: true
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - train
38
+ - loss
39
+ - min
40
+ - - valid
41
+ - loss
42
+ - min
43
+ - - train
44
+ - acc
45
+ - max
46
+ - - valid
47
+ - acc
48
+ - max
49
+ keep_nbest_models: 5
50
+ grad_clip: 5.0
51
+ grad_clip_type: 2.0
52
+ grad_noise: false
53
+ accum_grad: 1
54
+ no_forward_run: false
55
+ resume: false
56
+ train_dtype: float32
57
+ use_amp: false
58
+ log_interval: null
59
+ use_tensorboard: true
60
+ use_wandb: false
61
+ wandb_project: null
62
+ wandb_id: null
63
+ wandb_entity: null
64
+ wandb_name: null
65
+ wandb_model_log_interval: -1
66
+ detect_anomaly: false
67
+ pretrain_path: null
68
+ init_param: []
69
+ ignore_init_mismatch: false
70
+ freeze_param: []
71
+ num_iters_per_epoch: null
72
+ batch_size: 20
73
+ valid_batch_size: null
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/asr_stats_raw_en_word/logdir/train.12.scp
78
+ valid_shape_file:
79
+ - exp/asr_stats_raw_en_word/logdir/valid.12.scp
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length: []
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train/wav.scp
91
+ - speech
92
+ - sound
93
+ - - dump/raw/train/text
94
+ - text
95
+ - text
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/valid/wav.scp
98
+ - speech
99
+ - sound
100
+ - - dump/raw/valid/text
101
+ - text
102
+ - text
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.0002
110
+ scheduler: warmuplr
111
+ scheduler_conf:
112
+ warmup_steps: 25000
113
+ token_list:
114
+ - <blank>
115
+ - <unk>
116
+ - ්
117
+ - න
118
+ - ම
119
+ - ක
120
+ - ල
121
+ - ි
122
+ - ු
123
+ - ග
124
+ - ේ
125
+ - ර
126
+ - ත
127
+ - ද
128
+ - ව
129
+ - ට
130
+ - ඕ
131
+ - ී
132
+ - ප
133
+ - ය
134
+ - ෙ
135
+ - ස
136
+ - ණ
137
+ - ා
138
+ - ැ
139
+ - RequestAcc.balance
140
+ - Moneywithdraw
141
+ - Moneydeposit
142
+ - Moneytransfer
143
+ - Billpayments
144
+ - බ
145
+ - ඉ
146
+ - ශ
147
+ - ෂ
148
+ - ඩ
149
+ - Creditcardpayments
150
+ - එ
151
+ - ‍
152
+ - හ
153
+ - <sos/eos>
154
+ init: null
155
+ input_size: null
156
+ ctc_conf:
157
+ dropout_rate: 0.0
158
+ ctc_type: builtin
159
+ reduce: true
160
+ ignore_nan_grad: true
161
+ model_conf:
162
+ ctc_weight: 0.5
163
+ ignore_id: -1
164
+ lsm_weight: 0.0
165
+ length_normalized_loss: false
166
+ report_cer: true
167
+ report_wer: true
168
+ sym_space: <space>
169
+ sym_blank: <blank>
170
+ extract_feats_in_collect_stats: true
171
+ use_preprocessor: true
172
+ token_type: word
173
+ bpemodel: null
174
+ non_linguistic_symbols: null
175
+ cleaner: null
176
+ g2p: null
177
+ speech_volume_normalize: null
178
+ rir_scp: null
179
+ rir_apply_prob: 1.0
180
+ noise_scp: null
181
+ noise_apply_prob: 1.0
182
+ noise_db_range: '13_15'
183
+ frontend: default
184
+ frontend_conf:
185
+ fs: 16k
186
+ specaug: specaug
187
+ specaug_conf:
188
+ apply_time_warp: true
189
+ time_warp_window: 5
190
+ time_warp_mode: bicubic
191
+ apply_freq_mask: true
192
+ freq_mask_width_range:
193
+ - 0
194
+ - 30
195
+ num_freq_mask: 2
196
+ apply_time_mask: true
197
+ time_mask_width_range:
198
+ - 0
199
+ - 40
200
+ num_time_mask: 2
201
+ normalize: utterance_mvn
202
+ normalize_conf: {}
203
+ preencoder: null
204
+ preencoder_conf: {}
205
+ encoder: transformer
206
+ encoder_conf:
207
+ output_size: 256
208
+ attention_heads: 4
209
+ linear_units: 2048
210
+ num_blocks: 12
211
+ dropout_rate: 0.1
212
+ positional_dropout_rate: 0.1
213
+ attention_dropout_rate: 0.0
214
+ input_layer: conv2d
215
+ normalize_before: true
216
+ postencoder: null
217
+ postencoder_conf: {}
218
+ decoder: transformer
219
+ decoder_conf:
220
+ attention_heads: 4
221
+ linear_units: 2048
222
+ num_blocks: 6
223
+ dropout_rate: 0.1
224
+ positional_dropout_rate: 0.1
225
+ self_attention_dropout_rate: 0.0
226
+ src_attention_dropout_rate: 0.0
227
+ required:
228
+ - output_dir
229
+ - token_list
230
+ version: 0.10.3a3
231
+ distributed: false
exp/asr_stats_raw_en_word/logdir/stats.12/train/batch_keys ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ speech
2
+ text
exp/asr_stats_raw_en_word/logdir/stats.12/train/feats_lengths_stats.npz ADDED
Binary file (778 Bytes). View file
 
exp/asr_stats_raw_en_word/logdir/stats.12/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_stats_raw_en_word/logdir/stats.12/train/speech_shape ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wavs_audio1526985624260_3_5.wav 39360
2
+ wavs_audio1526985631794_3_6.wav 48960
3
+ wavs_audio1526985634840_1_5.wav 65280
4
+ wavs_audio1526985637609_3_7.wav 25920
5
+ wavs_audio1526985654320_3_8.wav 29760
6
+ wavs_audio1526985665007_2_1.wav 56640
7
+ wavs_audio1526985684367_2_2.wav 59520
8
+ wavs_audio1526985684560_4_1.wav 29760
9
+ wavs_audio1526985699919_2_4.wav 69120
10
+ wavs_audio1526985720271_2_6.wav 50880
11
+ wavs_audio1526985748846_3_1.wav 50880
12
+ wavs_audio1526985771667_3_2.wav 32640
13
+ wavs_audio1526985788422_3_3.wav 54720
14
+ wavs_audio1526985811423_3_5.wav 39360
15
+ wavs_audio1526985842900_4_1.wav 41280
16
+ wavs_audio1526985843874_1_5.wav 35520
17
+ wavs_audio1526985854679_1_6.wav 29760
18
+ wavs_audio1526985856978_4_2.wav 41280
19
+ wavs_audio1526985863962_4_3.wav 40320
20
+ wavs_audio1526985866725_1_7.wav 53760
21
+ wavs_audio1526985867124_1_1.wav 60480
22
+ wavs_audio1526985870791_4_4.wav 31680
23
+ wavs_audio1526985880824_4_5.wav 49920
24
+ wavs_audio1526985884692_2_1.wav 35520
25
+ wavs_audio1526985891709_2_2.wav 34560
26
+ wavs_audio1526985919844_5_2.wav 98880
27
+ wavs_audio1526985929522_2_3.wav 43200
28
+ wavs_audio1526985937315_2_4.wav 36480
29
+ wavs_audio1526985944403_2_5.wav 29760
30
+ wavs_audio1526985946985_1_1.wav 46080
31
+ wavs_audio1526985964774_1_2.wav 46080
32
+ wavs_audio1526985970398_1_4.wav 84480
33
+ wavs_audio1526985973382_3_1.wav 26880
34
+ wavs_audio1526985977416_1_1.wav 48960
35
+ wavs_audio1526985989468_3_3.wav 28800
36
+ wavs_audio1526985993949_5_4.wav 48000
37
+ wavs_audio1526985996680_3_4.wav 28800
38
+ wavs_audio1526985999822_1_3.wav 54720
39
+ wavs_audio1526986001645_1_2.wav 52800
40
+ wavs_audio1526986004390_5_5.wav 65280
41
+ wavs_audio1526986006027_3_5.wav 30720
42
+ wavs_audio1526986013569_3_6.wav 32640
43
+ wavs_audio1526986013869_5_6.wav 60480
44
+ wavs_audio1526986017131_1_1.wav 69120
45
+ wavs_audio1526986023134_3_7.wav 28800
46
+ wavs_audio1526986026460_1_4.wav 57600
47
+ wavs_audio1526986028850_1_4.wav 61440
48
+ wavs_audio1526986031364_3_8.wav 30720
49
+ wavs_audio1526986038758_1_5.wav 56640
50
+ wavs_audio1526986045359_1_3.wav 42240
51
+ wavs_audio1526986049128_1_6.wav 48000
52
+ wavs_audio1526986054679_1_4.wav 53760
53
+ wavs_audio1526986056001_1_6.wav 54720
54
+ wavs_audio1526986058107_4_2.wav 36480
55
+ wavs_audio1526986060927_6_2.wav 50880
56
+ wavs_audio1526986062560_1_7.wav 70080
57
+ wavs_audio1526986065613_4_3.wav 27840
58
+ wavs_audio1526986070442_4_4.wav 24000
59
+ wavs_audio1526986073227_1_5.wav 48960
60
+ wavs_audio1526986074610_1_2.wav 51840
61
+ wavs_audio1526986075386_1_7.wav 82560
62
+ wavs_audio1526986094591_5_1.wav 43200
63
+ wavs_audio1526986098023_1_8.wav 81600
64
+ wavs_audio1526986100533_1_3.wav 39360
65
+ wavs_audio1526986101739_2_4.wav 61440
66
+ wavs_audio1526986106691_1_6.wav 45120
67
+ wavs_audio1526986110995_1_1.wav 32640
68
+ wavs_audio1526986112701_2_5.wav 47040
69
+ wavs_audio1526986114724_5_3.wav 36480
70
+ wavs_audio1526986134094_5_5.wav 37440
71
+ wavs_audio1526986134721_1_7.wav 65280
72
+ wavs_audio1526986140693_2_7.wav 48960
73
+ wavs_audio1526986142358_5_6.wav 42240
74
+ wavs_audio1526986145427_1_3.wav 31680
75
+ wavs_audio1526986147839_1_8.wav 65280
76
+ wavs_audio1526986150434_1_6.wav 37440
77
+ wavs_audio1526986156067_2_1.wav 42240
78
+ wavs_audio1526986160452_3_1.wav 52800
79
+ wavs_audio1526986161162_1_4.wav 48960
80
+ wavs_audio1526986168606_2_1.wav 48000
81
+ wavs_audio1526986172550_1_7.wav 87360
82
+ wavs_audio1526986181104_2_2.wav 54720
83
+ wavs_audio1526986186429_1_6.wav 36480
84
+ wavs_audio1526986189325_2_3.wav 44160
85
+ wavs_audio1526986193798_2_2.wav 55680
86
+ wavs_audio1526986199038_2_4.wav 49920
87
+ wavs_audio1526986200639_1_7.wav 60480
88
+ wavs_audio1526986212315_2_5.wav 47040
89
+ wavs_audio1526986213279_1_8.wav 60480
90
+ wavs_audio1526986216840_5_7.wav 32640
91
+ wavs_audio1526986220541_2_6.wav 46080
92
+ wavs_audio1526986237311_2_1.wav 50880
93
+ wavs_audio1526986243940_2_4.wav 80640
94
+ wavs_audio1526986252397_3_2.wav 43200
95
+ wavs_audio1526986253549_1_1.wav 58560
96
+ wavs_audio1526986258147_2_5.wav 48960
97
+ wavs_audio1526986258855_6_2.wav 36480
98
+ wavs_audio1526986267624_2_2.wav 42240
99
+ wavs_audio1526986273630_2_1.wav 49920
100
+ wavs_audio1526986278268_2_3.wav 42240
101
+ wavs_audio1526986282138_2_7.wav 60480
102
+ wavs_audio1526986286341_6_3.wav 40320
103
+ wavs_audio1526986291717_1_1.wav 42240
104
+ wavs_audio1526986294920_3_6.wav 46080
105
+ wavs_audio1526986295948_2_4.wav 38400
106
+ wavs_audio1526986296671_2_3.wav 38400
107
+ wavs_audio1526986300507_6_4.wav 47040
108
+ wavs_audio1526986302631_3_7.wav 44160
109
+ wavs_audio1526986308389_2_5.wav 38400
110
+ wavs_audio1526986310297_3_1.wav 39360
111
+ wavs_audio1526986311003_3_8.wav 51840
112
+ wavs_audio1526986315765_1_1.wav 28800
113
+ wavs_audio1526986321856_3_2.wav 48000
114
+ wavs_audio1526986323874_4_1.wav 46080
115
+ wavs_audio1526986328478_2_4.wav 40320
116
+ wavs_audio1526986332943_4_2.wav 51840
117
+ wavs_audio1526986334059_3_3.wav 72960
118
+ wavs_audio1526986345036_2_7.wav 38400
119
+ wavs_audio1526986346831_4_3.wav 53760
120
+ wavs_audio1526986349586_2_5.wav 38400
121
+ wavs_audio1526986356132_4_4.wav 43200
122
+ wavs_audio1526986356658_3_5.wav 65280
123
+ wavs_audio1526986363308_2_6.wav 36480
124
+ wavs_audio1526986363441_3_2.wav 39360
125
+ wavs_audio1526986364384_4_5.wav 53760
126
+ wavs_audio1526986378346_2_7.wav 35520
127
+ wavs_audio1526986380912_5_1.wav 64320
128
+ wavs_audio1526986383681_1_3.wav 24960
129
+ wavs_audio1526986384092_3_1.wav 37440
130
+ wavs_audio1526986387611_3_4.wav 62400
131
+ wavs_audio1526986389985_1_4.wav 33600
132
+ wavs_audio1526986394615_3_7.wav 55680
133
+ wavs_audio1526986395198_5_2.wav 64320
134
+ wavs_audio1526986399200_3_2.wav 35520
135
+ wavs_audio1526986399414_1_5.wav 28800
136
+ wavs_audio1526986404099_3_8.wav 52800
137
+ wavs_audio1526986404411_5_3.wav 58560
138
+ wavs_audio1526986416370_3_4.wav 29760
139
+ wavs_audio1526986428897_3_5.wav 33600
140
+ wavs_audio1526986430267_3_6.wav 57600
141
+ wavs_audio1526986431551_5_4.wav 57600
142
+ wavs_audio1526986434577_4_1.wav 57600
143
+ wavs_audio1526986435901_3_2.wav 48000
144
+ wavs_audio1526986440680_5_5.wav 60480
145
+ wavs_audio1526986443586_3_7.wav 38400
146
+ wavs_audio1526986445561_3_7.wav 24960
147
+ wavs_audio1526986449787_3_3.wav 80640
148
+ wavs_audio1526986452840_3_8.wav 27840
149
+ wavs_audio1526986453435_3_8.wav 42240
150
+ wavs_audio1526986464469_5_7.wav 91200
151
+ wavs_audio1526986464670_3_4.wav 40320
152
+ wavs_audio1526986474896_1_1.wav 69120
153
+ wavs_audio1526986477588_4_2.wav 37440
154
+ wavs_audio1526986480308_3_5.wav 60480
155
+ wavs_audio1526986481319_6_1.wav 60480
156
+ wavs_audio1526986485336_4_3.wav 30720
157
+ wavs_audio1526986492918_4_4.wav 26880
158
+ wavs_audio1526986494268_3_6.wav 44160
159
+ wavs_audio1526986498705_4_5.wav 33600
160
+ wavs_audio1526986505536_4_3.wav 57600
161
+ wavs_audio1526986514851_6_2.wav 53760
162
+ wavs_audio1526986518301_5_1.wav 43200
163
+ wavs_audio1526986520630_3_8.wav 34560
164
+ wavs_audio1526986524204_6_3.wav 56640
165
+ wavs_audio1526986528126_4_1.wav 42240
166
+ wavs_audio1526986529822_4_5.wav 47040
167
+ wavs_audio1526986534112_1_3.wav 39360
168
+ wavs_audio1526986536567_6_4.wav 61440
169
+ wavs_audio1526986539592_4_3.wav 41280
170
+ wavs_audio1526986541667_4_2.wav 46080
171
+ wavs_audio1526986556588_4_1.wav 31680
172
+ wavs_audio1526986559296_1_4.wav 46080
173
+ wavs_audio1526986567858_4_5.wav 47040
174
+ wavs_audio1526986579068_4_4.wav 31680
175
+ wavs_audio1526986584329_5_2.wav 48960
176
+ wavs_audio1526986588425_1_6.wav 41280
177
+ wavs_audio1526986590238_4_5.wav 52800
178
+ wavs_audio1526986593494_5_1.wav 57600
179
+ wavs_audio1526986599320_5_3.wav 49920
180
+ wavs_audio1526986600296_5_3.wav 71040
181
+ wavs_audio1526986607539_5_2.wav 56640
182
+ wavs_audio1526986608700_5_4.wav 52800
183
+ wavs_audio1526986613380_5_4.wav 64320
184
+ wavs_audio1526986617030_5_1.wav 55680
185
+ wavs_audio1526986623759_5_5.wav 54720
186
+ wavs_audio1526986630976_5_5.wav 52800
187
+ wavs_audio1526986631150_5_2.wav 60480
188
+ wavs_audio1526986637317_5_6.wav 60480
189
+ wavs_audio1526986644558_5_3.wav 55680
190
+ wavs_audio1526986647579_5_7.wav 45120
191
+ wavs_audio1526986649587_5_3.wav 98880