mpc001 commited on
Commit
d319e26
1 Parent(s): 5cebf32

Upload 7 files

Browse files
benchmarks/LRS3/models/LRS3_AV_WER0.9/model.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ 9216,
3
+ 41,
4
+ {
5
+ "a_upsample_ratio": 1,
6
+ "accum_grad": 2,
7
+ "adim": 768,
8
+ "aheads": 12,
9
+ "apply_uttmvn": true,
10
+ "aux_a_upsample_ratio": 1,
11
+ "aux_adim": 768,
12
+ "aux_aheads": 12,
13
+ "aux_cnn_module_kernel": 31,
14
+ "aux_dropout_rate": 0.1,
15
+ "aux_elayers": 12,
16
+ "aux_eunits": 3072,
17
+ "aux_lsm_weight": 0.0,
18
+ "aux_macaron_style": 1,
19
+ "aux_transformer_attn_dropout_rate": 0.1,
20
+ "aux_transformer_encoder_attn_layer_type": "rel_mha",
21
+ "aux_transformer_input_layer": "conv1d",
22
+ "aux_use_cnn_module": 1,
23
+ "backend": "pytorch",
24
+ "badim": 320,
25
+ "batch_bins": 0,
26
+ "batch_count": "auto",
27
+ "batch_frames_in": 0,
28
+ "batch_frames_inout": 0,
29
+ "batch_frames_out": 0,
30
+ "bdropout_rate": 0.0,
31
+ "beam_size": 4,
32
+ "blayers": 2,
33
+ "bnmask": 2,
34
+ "bprojs": 300,
35
+ "btype": "blstmp",
36
+ "bunits": 300,
37
+ "cnn_module_kernel": 31,
38
+ "config2": null,
39
+ "config3": null,
40
+ "context_residual": false,
41
+ "criterion": "acc",
42
+ "ctc_type": "warpctc",
43
+ "ctc_weight": 0.3,
44
+ "debugmode": 1,
45
+ "dec_init": null,
46
+ "dec_init_mods": [
47
+ "att.",
48
+ " dec."
49
+ ],
50
+ "dict": "data/lang_1char/units.txt",
51
+ "dlayers": 6,
52
+ "dropout_rate": 0.1,
53
+ "dunits": 3072,
54
+ "early_stop_criterion": "validation/main/acc",
55
+ "elayers": 12,
56
+ "enc_init": null,
57
+ "enc_init_mods": [
58
+ "enc.enc."
59
+ ],
60
+ "eps": 1e-08,
61
+ "eps_decay": 0.01,
62
+ "eunits": 3072,
63
+ "fbank_fmax": null,
64
+ "fbank_fmin": 0.0,
65
+ "fbank_fs": 16000,
66
+ "fusion_hdim": 8192,
67
+ "fusion_norm": "batchnorm",
68
+ "grad_clip": 5.0,
69
+ "grad_noise": false,
70
+ "labels_type": "unigram5000",
71
+ "lm_weight": 0.1,
72
+ "lsm_weight": 0.1,
73
+ "macaron_style": 1,
74
+ "maxlen_in": 220,
75
+ "maxlen_out": 220,
76
+ "maxlenratio": 0.0,
77
+ "minibatches": 0,
78
+ "minlenratio": 0.0,
79
+ "model_module": "espnet.nets.pytorch_backend.e2e_asr_transformer_multitask_dual:E2E",
80
+ "mtlalpha": 0.1,
81
+ "n_iter_processes": 12,
82
+ "n_mels": 80,
83
+ "nbest": 1,
84
+ "ngpu": 1,
85
+ "num_encs": 1,
86
+ "num_input": 2,
87
+ "num_save_attention": 3,
88
+ "num_spkrs": 1,
89
+ "opt": "noam",
90
+ "patience": 0,
91
+ "penalty": 0.0,
92
+ "preprocess_conf": null,
93
+ "raw_max_freq_width": 150,
94
+ "raw_max_speed_rate": 1.1,
95
+ "raw_max_time_width": 0.4,
96
+ "raw_min_speed_rate": 0.9,
97
+ "raw_n_freq_mask": 2,
98
+ "raw_n_time_mask": 2,
99
+ "raw_speech_do_normalize": false,
100
+ "ref_channel": -1,
101
+ "rel_pos_type": "latest",
102
+ "relu_type": "swish",
103
+ "report_cer": false,
104
+ "report_interval_iters": 100,
105
+ "report_wer": false,
106
+ "rnnlm": null,
107
+ "rnnlm_conf": null,
108
+ "save_interval_iters": 0,
109
+ "seed": 1,
110
+ "sortagrad": 0,
111
+ "specaug_max_freq_width": 30,
112
+ "specaug_max_time_warp": 5,
113
+ "specaug_max_time_width": 40,
114
+ "specaug_n_freq_mask": 2,
115
+ "specaug_n_time_mask": 2,
116
+ "sr_interp_mode": "nearest",
117
+ "sr_interp_scale_factor": 1.0,
118
+ "stats_file": null,
119
+ "sym_blank": "<blank>",
120
+ "sym_space": "<space>",
121
+ "threshold": 0.0001,
122
+ "train_dtype": "float32",
123
+ "transformer_attn_dropout_rate": 0.1,
124
+ "transformer_encoder_attn_layer_type": "rel_mha",
125
+ "transformer_init": "pytorch",
126
+ "transformer_input_layer": "conv3d",
127
+ "transformer_length_normalized_loss": 0,
128
+ "transformer_warmup_steps": 25000,
129
+ "use_beamformer": true,
130
+ "use_cnn_module": 1,
131
+ "use_dnn_mask_for_wpe": false,
132
+ "use_freqmask": false,
133
+ "use_frontend": false,
134
+ "use_noiseaug": false,
135
+ "use_specaug": false,
136
+ "use_speedaug": false,
137
+ "use_timemask": false,
138
+ "use_v_adaptive_timemask": true,
139
+ "use_v_cutout": false,
140
+ "use_v_timemask": false,
141
+ "use_wpe": false,
142
+ "uttmvn_norm_means": true,
143
+ "uttmvn_norm_vars": false,
144
+ "v_cutout_max_hole_length": 22,
145
+ "v_cutout_n_holes": 1,
146
+ "v_raw_max_time_width": 0.4,
147
+ "v_raw_n_time_mask": 1,
148
+ "v_timemask_replace_with_zero": false,
149
+ "v_timemask_stride": 1.0,
150
+ "verbose": 0,
151
+ "wavaugments": null,
152
+ "wdropout_rate": 0.0,
153
+ "weight_decay": 0.0,
154
+ "wlayers": 2,
155
+ "wpe_delay": 3,
156
+ "wpe_taps": 5,
157
+ "wprojs": 300,
158
+ "wtype": "blstmp",
159
+ "wunits": 300,
160
+ "zero_triu": false
161
+ }
162
+ ]
benchmarks/LRS3/models/LRS3_AV_WER0.9/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16304660a2181629215f8c7390565750784379f6e8a2b5c8d155fd9574afcd6e
3
+ size 1776719497
benchmarks/LRS3/models/LRS3_A_WER1.0/model.json ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ 9216,
3
+ 41,
4
+ {
5
+ "a_upsample_ratio": 1,
6
+ "accum_grad": 2,
7
+ "adim": 768,
8
+ "aheads": 12,
9
+ "apply_uttmvn": true,
10
+ "aux_lsm_weight": 0.0,
11
+ "backend": "pytorch",
12
+ "badim": 320,
13
+ "batch_bins": 0,
14
+ "batch_count": "auto",
15
+ "batch_frames_in": 0,
16
+ "batch_frames_inout": 0,
17
+ "batch_frames_out": 0,
18
+ "bdropout_rate": 0.0,
19
+ "beam_size": 4,
20
+ "blayers": 2,
21
+ "bnmask": 2,
22
+ "bprojs": 300,
23
+ "btype": "blstmp",
24
+ "bunits": 300,
25
+ "cnn_module_kernel": 31,
26
+ "config2": null,
27
+ "config3": null,
28
+ "context_residual": false,
29
+ "criterion": "acc",
30
+ "ctc_type": "warpctc",
31
+ "ctc_weight": 0.3,
32
+ "debugmode": 1,
33
+ "dec_init": null,
34
+ "dec_init_mods": [
35
+ "att.",
36
+ " dec."
37
+ ],
38
+ "dict": "data/lang_1char/units.txt",
39
+ "dlayers": 6,
40
+ "dropout_rate": 0.1,
41
+ "dunits": 3072,
42
+ "early_stop_criterion": "validation/main/acc",
43
+ "elayers": 12,
44
+ "enc_init": null,
45
+ "enc_init_mods": [
46
+ "enc.enc."
47
+ ],
48
+ "eps": 1e-08,
49
+ "eps_decay": 0.01,
50
+ "eunits": 3072,
51
+ "fbank_fmax": null,
52
+ "fbank_fmin": 0.0,
53
+ "fbank_fs": 16000,
54
+ "grad_clip": 5.0,
55
+ "grad_noise": false,
56
+ "labels_type": "unigram5000",
57
+ "lm_weight": 0.1,
58
+ "lsm_weight": 0.1,
59
+ "macaron_style": 1,
60
+ "maxlen_in": 220,
61
+ "maxlen_out": 220,
62
+ "maxlenratio": 0.0,
63
+ "minibatches": 0,
64
+ "minlenratio": 0.0,
65
+ "model_module": "espnet.nets.pytorch_backend.e2e_asr_transformer_multitask_dual:E2E",
66
+ "mtl_custom_worker_l1_weight": 0.0,
67
+ "mtl_custom_worker_length_normalized_loss": 0,
68
+ "mtl_custom_worker_mlp_hdim": 256,
69
+ "mtl_custom_worker_mlp_nlayers": 2,
70
+ "mtl_custom_worker_mlp_nonlin_end": 0,
71
+ "mtl_custom_worker_mlp_nonlin_type": "relu",
72
+ "mtl_custom_worker_name": "patrickvonplaten/wav2vec2-base",
73
+ "mtl_custom_worker_task_type": "",
74
+ "mtl_custom_worker_tgt_type": "projected_quantized_states",
75
+ "mtl_kl_weight": 0.0,
76
+ "mtl_kl_weight_2": 0.0,
77
+ "mtl_l1_weight": 0.4,
78
+ "mtl_l1_weight_2": 0.4,
79
+ "mtl_length_normalized_loss": 1,
80
+ "mtl_length_normalized_loss_2": 1,
81
+ "mtl_mlp_hdim": 256,
82
+ "mtl_mlp_hdim_2": 256,
83
+ "mtl_mlp_nlayers": 1,
84
+ "mtl_mlp_nlayers_2": 1,
85
+ "mtl_mlp_nonlin_end": 0,
86
+ "mtl_mlp_nonlin_end_2": 0,
87
+ "mtl_mlp_nonlin_type": "relu",
88
+ "mtl_mlp_nonlin_type_2": "relu",
89
+ "mtl_task_layer": "conformer6",
90
+ "mtl_task_type": "l1",
91
+ "mtl_task_type_2": "l1",
92
+ "mtl_worker_source": "conv1d_lrs3_v04_lrs2",
93
+ "mtl_worker_source_2": "conv3d_lrs3_v04_lrs2_dual",
94
+ "mtlalpha": 0.1,
95
+ "n_iter_processes": 12,
96
+ "n_mels": 80,
97
+ "nbest": 1,
98
+ "ngpu": 1,
99
+ "num_encs": 1,
100
+ "num_input": 2,
101
+ "num_save_attention": 3,
102
+ "num_spkrs": 1,
103
+ "opt": "noam",
104
+ "patience": 0,
105
+ "penalty": 0.0,
106
+ "preprocess_conf": null,
107
+ "pretrain_dataset": "lrs2_full_dual_ignore",
108
+ "raw_max_freq_width": 150,
109
+ "raw_max_speed_rate": 1.1,
110
+ "raw_max_time_width": 0.4,
111
+ "raw_min_speed_rate": 0.9,
112
+ "raw_n_freq_mask": 2,
113
+ "raw_n_time_mask": 2,
114
+ "raw_speech_do_normalize": false,
115
+ "ref_channel": -1,
116
+ "rel_pos_type": "latest",
117
+ "relu_type": "swish",
118
+ "report_cer": false,
119
+ "report_interval_iters": 100,
120
+ "report_wer": false,
121
+ "rnnlm": null,
122
+ "rnnlm_conf": null,
123
+ "save_interval_iters": 0,
124
+ "seed": 1,
125
+ "sortagrad": 0,
126
+ "specaug_max_freq_width": 30,
127
+ "specaug_max_time_warp": 5,
128
+ "specaug_max_time_width": 40,
129
+ "specaug_n_freq_mask": 2,
130
+ "specaug_n_time_mask": 2,
131
+ "sr_interp_mode": "nearest",
132
+ "sr_interp_scale_factor": 1.0,
133
+ "stats_file": null,
134
+ "sym_blank": "<blank>",
135
+ "sym_space": "<space>",
136
+ "threshold": 0.0001,
137
+ "train_dtype": "float32",
138
+ "transformer_attn_dropout_rate": 0.1,
139
+ "transformer_encoder_attn_layer_type": "rel_mha",
140
+ "transformer_init": "pytorch",
141
+ "transformer_input_layer": "conv1d",
142
+ "transformer_length_normalized_loss": 0,
143
+ "transformer_warmup_steps": 25000,
144
+ "use_beamformer": true,
145
+ "use_cnn_module": 1,
146
+ "use_dnn_mask_for_wpe": false,
147
+ "use_freqmask": false,
148
+ "use_frontend": false,
149
+ "use_noiseaug": false,
150
+ "use_specaug": false,
151
+ "use_speedaug": false,
152
+ "use_timemask": false,
153
+ "use_v_adaptive_timemask": true,
154
+ "use_v_cutout": false,
155
+ "use_v_timemask": false,
156
+ "use_wpe": false,
157
+ "uttmvn_norm_means": true,
158
+ "uttmvn_norm_vars": false,
159
+ "v_cutout_max_hole_length": 22,
160
+ "v_cutout_n_holes": 1,
161
+ "v_raw_max_time_width": 0.4,
162
+ "v_raw_n_time_mask": 1,
163
+ "v_timemask_replace_with_zero": false,
164
+ "v_timemask_stride": 1.0,
165
+ "verbose": 0,
166
+ "wavaugments": null,
167
+ "wdropout_rate": 0.0,
168
+ "weight_decay": 0.0,
169
+ "wlayers": 2,
170
+ "wpe_delay": 3,
171
+ "wpe_taps": 5,
172
+ "wprojs": 300,
173
+ "wtype": "blstmp",
174
+ "wunits": 300,
175
+ "zero_triu": false
176
+ }
177
+ ]
benchmarks/LRS3/models/LRS3_A_WER1.0/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6443e830edbdd1886854ba64219b29430eca2ee055497ffc4f04419258ff3b2
3
+ size 972562947
benchmarks/LRS3/models/LRS3_V_WER19.1/model.json ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ 9216,
3
+ 41,
4
+ {
5
+ "a_upsample_ratio": 1,
6
+ "accum_grad": 2,
7
+ "adim": 768,
8
+ "aheads": 12,
9
+ "apply_uttmvn": true,
10
+ "aux_lsm_weight": 0.0,
11
+ "backend": "pytorch",
12
+ "badim": 320,
13
+ "batch_bins": 0,
14
+ "batch_count": "auto",
15
+ "batch_frames_in": 0,
16
+ "batch_frames_inout": 0,
17
+ "batch_frames_out": 0,
18
+ "bdropout_rate": 0.0,
19
+ "beam_size": 4,
20
+ "blayers": 2,
21
+ "bnmask": 2,
22
+ "bprojs": 300,
23
+ "btype": "blstmp",
24
+ "bunits": 300,
25
+ "cnn_module_kernel": 31,
26
+ "config2": null,
27
+ "config3": null,
28
+ "context_residual": false,
29
+ "criterion": "acc",
30
+ "ctc_type": "warpctc",
31
+ "ctc_weight": 0.3,
32
+ "debugmode": 1,
33
+ "dec_init": null,
34
+ "dec_init_mods": [
35
+ "att.",
36
+ " dec."
37
+ ],
38
+ "dict": "data/lang_1char/units.txt",
39
+ "dlayers": 6,
40
+ "dropout_rate": 0.1,
41
+ "dunits": 3072,
42
+ "early_stop_criterion": "validation/main/acc",
43
+ "elayers": 12,
44
+ "enc_init": null,
45
+ "enc_init_mods": [
46
+ "enc.enc."
47
+ ],
48
+ "eps": 1e-08,
49
+ "eps_decay": 0.01,
50
+ "eunits": 3072,
51
+ "fbank_fmax": null,
52
+ "fbank_fmin": 0.0,
53
+ "fbank_fs": 16000,
54
+ "grad_clip": 5.0,
55
+ "grad_noise": false,
56
+ "labels_type": "unigram5000",
57
+ "lm_weight": 0.1,
58
+ "lsm_weight": 0.1,
59
+ "macaron_style": 1,
60
+ "maxlen_in": 220,
61
+ "maxlen_out": 220,
62
+ "maxlenratio": 0.0,
63
+ "minibatches": 0,
64
+ "minlenratio": 0.0,
65
+ "model_module": "espnet.nets.pytorch_backend.e2e_asr_transformer_multitask_dual:E2E",
66
+ "mtl_custom_worker_l1_weight": 0.0,
67
+ "mtl_custom_worker_length_normalized_loss": 0,
68
+ "mtl_custom_worker_mlp_hdim": 256,
69
+ "mtl_custom_worker_mlp_nlayers": 2,
70
+ "mtl_custom_worker_mlp_nonlin_end": 0,
71
+ "mtl_custom_worker_mlp_nonlin_type": "relu",
72
+ "mtl_custom_worker_name": "patrickvonplaten/wav2vec2-base",
73
+ "mtl_custom_worker_task_type": "",
74
+ "mtl_custom_worker_tgt_type": "projected_quantized_states",
75
+ "mtl_kl_weight": 0.0,
76
+ "mtl_kl_weight_2": 0.0,
77
+ "mtl_l1_weight": 0.4,
78
+ "mtl_l1_weight_2": 0.4,
79
+ "mtl_length_normalized_loss": 1,
80
+ "mtl_length_normalized_loss_2": 1,
81
+ "mtl_mlp_hdim": 256,
82
+ "mtl_mlp_hdim_2": 256,
83
+ "mtl_mlp_nlayers": 1,
84
+ "mtl_mlp_nlayers_2": 1,
85
+ "mtl_mlp_nonlin_end": 0,
86
+ "mtl_mlp_nonlin_end_2": 0,
87
+ "mtl_mlp_nonlin_type": "relu",
88
+ "mtl_mlp_nonlin_type_2": "relu",
89
+ "mtl_task_layer": "conformer6",
90
+ "mtl_task_type": "l1",
91
+ "mtl_task_type_2": "l1",
92
+ "mtl_worker_source": "conv1d_lrs3_v04_lrs2",
93
+ "mtl_worker_source_2": "conv3d_lrs3_v04_lrs2_dual",
94
+ "mtlalpha": 0.1,
95
+ "n_iter_processes": 12,
96
+ "n_mels": 80,
97
+ "nbest": 1,
98
+ "ngpu": 1,
99
+ "num_encs": 1,
100
+ "num_input": 2,
101
+ "num_save_attention": 3,
102
+ "num_spkrs": 1,
103
+ "opt": "noam",
104
+ "patience": 0,
105
+ "penalty": 0.0,
106
+ "preprocess_conf": null,
107
+ "pretrain_dataset": "lrs2_full_dual_ignore",
108
+ "raw_max_freq_width": 150,
109
+ "raw_max_speed_rate": 1.1,
110
+ "raw_max_time_width": 0.4,
111
+ "raw_min_speed_rate": 0.9,
112
+ "raw_n_freq_mask": 2,
113
+ "raw_n_time_mask": 2,
114
+ "raw_speech_do_normalize": false,
115
+ "ref_channel": -1,
116
+ "rel_pos_type": "latest",
117
+ "relu_type": "swish",
118
+ "report_cer": false,
119
+ "report_interval_iters": 100,
120
+ "report_wer": false,
121
+ "rnnlm": null,
122
+ "rnnlm_conf": null,
123
+ "save_interval_iters": 0,
124
+ "seed": 1,
125
+ "sortagrad": 0,
126
+ "specaug_max_freq_width": 30,
127
+ "specaug_max_time_warp": 5,
128
+ "specaug_max_time_width": 40,
129
+ "specaug_n_freq_mask": 2,
130
+ "specaug_n_time_mask": 2,
131
+ "sr_interp_mode": "nearest",
132
+ "sr_interp_scale_factor": 1.0,
133
+ "stats_file": null,
134
+ "sym_blank": "<blank>",
135
+ "sym_space": "<space>",
136
+ "threshold": 0.0001,
137
+ "train_dtype": "float32",
138
+ "transformer_attn_dropout_rate": 0.1,
139
+ "transformer_encoder_attn_layer_type": "rel_mha",
140
+ "transformer_init": "pytorch",
141
+ "transformer_input_layer": "conv3d",
142
+ "transformer_length_normalized_loss": 0,
143
+ "transformer_warmup_steps": 25000,
144
+ "use_beamformer": true,
145
+ "use_cnn_module": 1,
146
+ "use_dnn_mask_for_wpe": false,
147
+ "use_freqmask": false,
148
+ "use_frontend": false,
149
+ "use_noiseaug": false,
150
+ "use_specaug": false,
151
+ "use_speedaug": false,
152
+ "use_timemask": false,
153
+ "use_v_adaptive_timemask": true,
154
+ "use_v_cutout": false,
155
+ "use_v_timemask": false,
156
+ "use_wpe": false,
157
+ "uttmvn_norm_means": true,
158
+ "uttmvn_norm_vars": false,
159
+ "v_cutout_max_hole_length": 22,
160
+ "v_cutout_n_holes": 1,
161
+ "v_raw_max_time_width": 0.4,
162
+ "v_raw_n_time_mask": 1,
163
+ "v_timemask_replace_with_zero": false,
164
+ "v_timemask_stride": 1.0,
165
+ "verbose": 0,
166
+ "wavaugments": null,
167
+ "wdropout_rate": 0.0,
168
+ "weight_decay": 0.0,
169
+ "wlayers": 2,
170
+ "wpe_delay": 3,
171
+ "wpe_taps": 5,
172
+ "wprojs": 300,
173
+ "wtype": "blstmp",
174
+ "wunits": 300,
175
+ "zero_triu": false
176
+ }
177
+ ]
benchmarks/LRS3/models/LRS3_V_WER19.1/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e740cef369abeabd0ba2c18e37a0661342e1d94d432d6caa77755a11821d8fe3
3
+ size 1001908942
benchmarks/LRS3/models/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Put model folders here.