Dandan0K commited on
Commit
12080b6
1 Parent(s): 81957f5

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -2,34 +2,28 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
22
  *.tgz filter=lfs diff=lfs merge=lfs -text
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ transcriptions_cv7_test.json filter=lfs diff=lfs merge=lfs -text
29
+ transcriptions_cv7_validation.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - it
4
+ license: apache-2.0
5
+ tags:
6
+ - automatic-speech-recognition
7
+ - it
8
+ datasets:
9
+ - mozilla-foundation/common_voice_7_0
10
+ ---
11
+ # exp_w2v2t_it_vp-100k_s449
12
+
13
+ Fine-tuned [facebook/wav2vec2-large-100k-voxpopuli](https://huggingface.co/facebook/wav2vec2-large-100k-voxpopuli) for speech recognition using the train split of [Common Voice 7.0 (it)](https://huggingface.co/datasets/mozilla-foundation/common_voice_7_0).
14
+ When using this model, make sure that your speech input is sampled at 16kHz.
15
+
16
+ This model has been fine-tuned by the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) tool.
17
+
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["", "<s>", "</s>", "\u2047", " ", "'", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00e0", "\u00e1", "\u00e8", "\u00e9", "\u00ec", "\u00ed", "\u00f2", "\u00f3", "\u00f9", "\u00fa", "\u010d", "\u014d", "\u0161"], "is_bpe": false}
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-100k-voxpopuli",
3
+ "activation_dropout": 0.05,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.05,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "sum",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.05,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.05,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.05,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 0,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.15.0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 46,
115
+ "xvector_output_dim": 512
116
+ }
language_model/2gram_It_Ref.arpa ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \data\
2
+ ngram 1=244
3
+ ngram 2=257
4
+
5
+ \1-grams:
6
+ -2.4316506 <unk> 0
7
+ 0 <s> -0.30103
8
+ 0 </s> -0.30103
9
+ 0 </s> -0.30103
10
+ -2.4094923 </s> 0
11
+ -1.9548386 mela -1.5642715
12
+ -2.4094923 pomodoro -1.5642715
13
+ -2.4094923 mondo -1.5642715
14
+ -2.4094923 tentacolo -1.5642715
15
+ -2.4094923 maschera -1.5642715
16
+ -2.1308527 cartapesta -1.3222193
17
+ -2.4094923 gioco -1.5642715
18
+ -2.4094923 polipo -1.5642715
19
+ -2.4094923 cuore -1.5642715
20
+ -2.4094923 prezzemolo -1.5642715
21
+ -2.4094923 margherita -1.5642715
22
+ -2.4094923 girotondo -1.2632414
23
+ -2.1308527 rofe -1.6163005
24
+ -2.4094923 vufebu -1.6163005
25
+ -2.4094923 costro -1.6163005
26
+ -2.4094923 lada -1.6163005
27
+ -2.4094923 nurto -1.6163005
28
+ -2.4094923 frimace -1.6163005
29
+ -2.4094923 canfrosto -1.6163005
30
+ -2.4094923 presemma -1.6163005
31
+ -2.4094923 oblimione -1.6163005
32
+ -2.4094923 ceglirate -1.6163005
33
+ -2.4094923 banzione -1.6163005
34
+ -2.4094923 alcelisto -1.2483237
35
+ -1.7240447 vaso -0.8239087
36
+ -2.4094923 pace -0.8239087
37
+ -2.4094923 camera -0.8239087
38
+ -2.4094923 genitore -0.8239087
39
+ -2.4094923 mese -0.8239087
40
+ -2.4094923 cava -0.8239087
41
+ -2.4094923 parola -0.8239087
42
+ -2.4094923 bambina -0.8239087
43
+ -2.4094923 oggi -0.8239087
44
+ -2.4094923 foca -0.8239087
45
+ -2.4094923 aprile -0.8239087
46
+ -2.4094923 carnevale -0.69897
47
+ -2.1308527 duna -0.8239087
48
+ -2.4094923 nonno -0.8239087
49
+ -2.4094923 canile -0.8239087
50
+ -2.4094923 binocolo -0.8239087
51
+ -2.4094923 bacio -0.8239087
52
+ -2.4094923 alga -0.8239087
53
+ -2.4094923 codice -0.8239087
54
+ -2.4094923 bandito -0.8239087
55
+ -2.4094923 vizio -0.8239087
56
+ -2.4094923 matto -0.8239087
57
+ -2.4094923 avviso -0.8239087
58
+ -2.4094923 carnivoro -0.69897
59
+ -2.1308527 pranzo -0.8239087
60
+ -2.4094923 foglio -0.8239087
61
+ -2.4094923 sinistra -0.8239087
62
+ -2.4094923 bersaglio -0.8239087
63
+ -2.4094923 strano -0.8239087
64
+ -2.4094923 prezzo -0.8239087
65
+ -2.4094923 montagna -0.8239087
66
+ -2.4094923 mezzogiorno -0.8239087
67
+ -2.4094923 maestra -0.8239087
68
+ -2.4094923 piastra -0.8239087
69
+ -2.4094923 fratello -0.8239087
70
+ -2.4094923 pescespada -0.8239087
71
+ -2.4094923 strato -0.8239087
72
+ -2.4094923 balestra -0.8239087
73
+ -2.4094923 appuntamento -0.8239087
74
+ -2.4094923 capoclasse -0.8239087
75
+ -2.4094923 veglia -0.8239087
76
+ -2.4094923 risposta -0.8239087
77
+ -2.4094923 bicicletta -0.8239087
78
+ -2.4094923 caffetteria -0.8239087
79
+ -2.4094923 strizza -0.8239087
80
+ -2.4094923 cassetto -0.8239087
81
+ -2.4094923 cartellino -0.8239087
82
+ -2.4094923 insegnamento -0.45593196
83
+ -1.9548386 vilo -0.72699875
84
+ -2.4094923 soce -0.72699875
85
+ -2.4094923 gloveno -0.72699875
86
+ -2.4094923 munviglio -0.72699875
87
+ -2.4094923 depe -0.72699875
88
+ -2.4094923 banse -0.72699875
89
+ -2.4094923 coritta -0.72699875
90
+ -2.4094923 prodeglia -0.72699875
91
+ -2.4094923 arvi -0.72699875
92
+ -2.4094923 bignalo -0.72699875
93
+ -2.4094923 flunestro -0.72699875
94
+ -2.4094923 risognefa -0.72699875
95
+ -2.4094923 ostro -0.72699875
96
+ -2.4094923 bismaggo -0.72699875
97
+ -2.4094923 sirvelone -0.72699875
98
+ -2.4094923 chedinare -0.72699875
99
+ -2.4094923 dorca -0.72699875
100
+ -2.4094923 boleggio -0.72699875
101
+ -2.4094923 gagliralo -0.72699875
102
+ -2.4094923 nachipante -0.72699875
103
+ -2.4094923 nutto -0.72699875
104
+ -2.4094923 egnomisa -0.72699875
105
+ -2.4094923 coragresto -0.72699875
106
+ -2.4094923 lesciacope -0.35902193
107
+ -2.1308527 caso -0.60206
108
+ -2.4094923 fame -0.60206
109
+ -2.4094923 vicino -0.60206
110
+ -2.4094923 domenica -0.60206
111
+ -2.4094923 meno -0.60206
112
+ -2.4094923 bava -0.60206
113
+ -2.4094923 nuvola -0.60206
114
+ -2.4094923 signora -0.60206
115
+ -2.4094923 ecco -0.60206
116
+ -2.4094923 foga -0.60206
117
+ -2.4094923 estate -0.60206
118
+ -2.4094923 ospedale -0.60206
119
+ -2.4094923 diva -0.60206
120
+ -2.4094923 notte -0.60206
121
+ -2.4094923 barile -0.60206
122
+ -2.4094923 basilico -0.60206
123
+ -2.4094923 radio -0.60206
124
+ -2.4094923 anca -0.60206
125
+ -2.4094923 cofano -0.60206
126
+ -2.4094923 candito -0.60206
127
+ -2.4094923 bivio -0.60206
128
+ -2.4094923 fatto -0.60206
129
+ -2.4094923 attesa -0.60206
130
+ -2.4094923 candidato -0.47712123
131
+ -2.1308527 scarpa -0.60206
132
+ -2.4094923 figlio -0.60206
133
+ -2.4094923 palestra -0.60206
134
+ -2.4094923 fermaglio -0.60206
135
+ -2.4094923 strada -0.60206
136
+ -2.4094923 pressa -0.60206
137
+ -2.4094923 compagno -0.60206
138
+ -2.4094923 soggiorno -0.60206
139
+ -2.4094923 giostra -0.60206
140
+ -2.4094923 mostra -0.60206
141
+ -2.4094923 castello -0.60206
142
+ -2.4094923 cassaforte -0.60206
143
+ -2.4094923 strage -0.60206
144
+ -2.4094923 minestra -0.60206
145
+ -2.4094923 appartamento -0.60206
146
+ -2.4094923 caposcuola -0.60206
147
+ -2.4094923 taglia -0.60206
148
+ -2.4094923 farfalla -0.60206
149
+ -2.4094923 medaglietta -0.60206
150
+ -2.4094923 caffettiera -0.60206
151
+ -2.4094923 stringa -0.60206
152
+ -2.4094923 gessetto -0.60206
153
+ -2.4094923 cartoncino -0.60206
154
+ -2.4094923 arrossamento -0.47712123
155
+ -2.4094923 vuso -0.5228787
156
+ -2.4094923 seca -0.5228787
157
+ -2.4094923 glofeno -0.5228787
158
+ -2.4094923 ranviglio -0.5228787
159
+ -2.4094923 dape -0.5228787
160
+ -2.4094923 larse -0.5228787
161
+ -2.4094923 dorizza -0.5228787
162
+ -2.4094923 pradeglia -0.5228787
163
+ -2.4094923 arlo -0.5228787
164
+ -2.4094923 begnole -0.5228787
165
+ -2.4094923 flavestro -0.5228787
166
+ -2.4094923 risagnera -0.5228787
167
+ -2.4094923 istro -0.5228787
168
+ -2.4094923 bismoggo -0.5228787
169
+ -2.4094923 sirfelone -0.5228787
170
+ -2.4094923 chinadire -0.5228787
171
+ -2.4094923 borte -0.5228787
172
+ -2.4094923 foneggio -0.5228787
173
+ -2.4094923 caglivaro -0.5228787
174
+ -2.4094923 pachetenta -0.5228787
175
+ -2.4094923 bitto -0.5228787
176
+ -2.4094923 egnamisa -0.5228787
177
+ -2.4094923 caregresto -0.5228787
178
+ -2.4094923 vusciacope -0.30103
179
+ -1.8242877 naso -0.72699875
180
+ -2.4094923 pane -0.72699875
181
+ -2.4094923 lavoro -0.72699875
182
+ -2.4094923 telefono -0.72699875
183
+ -2.4094923 mano -0.72699875
184
+ -2.4094923 fava -0.72699875
185
+ -2.4094923 natura -0.72699875
186
+ -2.4094923 persona -0.72699875
187
+ -2.4094923 anno -0.72699875
188
+ -2.4094923 foce -0.72699875
189
+ -2.4094923 angolo -0.72699875
190
+ -2.4094923 personale -0.72699875
191
+ -2.4094923 diga -0.72699875
192
+ -2.4094923 gatto -0.72699875
193
+ -2.4094923 badile -0.72699875
194
+ -2.4094923 bonifico -0.72699875
195
+ -2.4094923 linea -0.72699875
196
+ -2.4094923 alba -0.72699875
197
+ -2.4094923 cometa -0.72699875
198
+ -2.4094923 pentito -0.72699875
199
+ -2.4094923 tizio -0.72699875
200
+ -2.4094923 patto -0.72699875
201
+ -2.4094923 offesa -0.72699875
202
+ -2.4094923 calderone -0.72699875
203
+ -2.4094923 frutta -0.72699875
204
+ -2.4094923 foglia -0.72699875
205
+ -2.4094923 finestra -0.72699875
206
+ -2.4094923 ventaglio -0.72699875
207
+ -2.4094923 strega -0.72699875
208
+ -2.4094923 pronto -0.72699875
209
+ -2.4094923 campagna -0.72699875
210
+ -2.4094923 buongiorno -0.72699875
211
+ -2.4094923 destra -0.72699875
212
+ -2.4094923 lastra -0.72699875
213
+ -2.4094923 cappello -0.72699875
214
+ -2.4094923 strofa -0.72699875
215
+ -2.4094923 ministra -0.72699875
216
+ -2.4094923 inquinamento -0.72699875
217
+ -2.4094923 caposquadra -0.72699875
218
+ -2.4094923 tiglio -0.72699875
219
+ -2.4094923 fantasma -0.72699875
220
+ -2.4094923 motocicletta -0.72699875
221
+ -2.4094923 caffelatte -0.72699875
222
+ -2.4094923 strillo -0.72699875
223
+ -2.4094923 rossetto -0.72699875
224
+ -2.4094923 cartellone -0.72699875
225
+ -2.4094923 sollevamento -0.72699875
226
+ -2.4094923 valo -0.72699875
227
+ -2.4094923 save -0.72699875
228
+ -2.4094923 glovelo -0.72699875
229
+ -2.4094923 sonviglio -0.72699875
230
+ -2.4094923 dete -0.72699875
231
+ -2.4094923 farde -0.72699875
232
+ -2.4094923 poritta -0.72699875
233
+ -2.4094923 prodiglia -0.72699875
234
+ -2.4094923 anci -0.72699875
235
+ -2.4094923 bignaso -0.72699875
236
+ -2.4094923 flenestro -0.72699875
237
+ -2.4094923 risugnela -0.72699875
238
+ -2.4094923 ustro -0.72699875
239
+ -2.4094923 bismacco -0.72699875
240
+ -2.4094923 sarvelone -0.72699875
241
+ -2.4094923 chetinere -0.72699875
242
+ -2.4094923 binca -0.72699875
243
+ -2.4094923 lonaggio -0.72699875
244
+ -2.4094923 cagliralo -0.72699875
245
+ -2.4094923 machidante -0.72699875
246
+ -2.4094923 tucca -0.72699875
247
+ -2.4094923 ignemisa -0.72699875
248
+ -2.4094923 coregresta -0.72699875
249
+ -2.4094923 nisciacope -0.42596874
250
+
251
+ \2-grams:
252
+ -1.1940873 nisciacope </s>
253
+ -0.29623765 <s> mela
254
+ -0.25558913 girotondo mela
255
+ -0.47981584 alcelisto mela
256
+ -0.011961477 mela pomodoro
257
+ -0.011961477 pomodoro mondo
258
+ -0.011961477 mondo tentacolo
259
+ -0.011961477 tentacolo maschera
260
+ -0.011918825 maschera cartapesta
261
+ -0.08943576 cappello cartapesta
262
+ -0.07089193 cartapesta gioco
263
+ -0.011961477 gioco polipo
264
+ -0.011961477 polipo cuore
265
+ -0.011961477 cuore prezzemolo
266
+ -0.011961477 prezzemolo margherita
267
+ -0.011961477 margherita girotondo
268
+ -0.4074761 girotondo rofe
269
+ -0.21806063 alcelisto rofe
270
+ -0.010594367 rofe vufebu
271
+ -0.010594367 vufebu costro
272
+ -0.010594367 costro lada
273
+ -0.010594367 lada nurto
274
+ -0.010594367 nurto frimace
275
+ -0.010594367 frimace canfrosto
276
+ -0.010594367 canfrosto presemma
277
+ -0.010594367 presemma oblimione
278
+ -0.010594367 oblimione ceglirate
279
+ -0.010594367 ceglirate banzione
280
+ -0.010594367 banzione alcelisto
281
+ -2.0395193 alcelisto vaso
282
+ -0.87530303 lesciacope vaso
283
+ -1.047566 candidato vaso
284
+ -0.96082795 vusciacope vaso
285
+ -0.3520508 nisciacope vaso
286
+ -0.07028266 vaso pace
287
+ -0.07028266 pace camera
288
+ -0.07028266 camera genitore
289
+ -0.07028266 genitore mese
290
+ -0.07028266 mese cava
291
+ -0.07028266 cava parola
292
+ -0.07028266 parola bambina
293
+ -0.07028266 bambina oggi
294
+ -0.07028266 oggi foca
295
+ -0.07028266 foca aprile
296
+ -0.07028266 aprile carnevale
297
+ -0.12408276 carnevale duna
298
+ -1.279101 insegnamento duna
299
+ -0.07028266 duna nonno
300
+ -0.07028266 nonno canile
301
+ -0.07028266 canile binocolo
302
+ -0.07028266 binocolo bacio
303
+ -0.07028266 bacio alga
304
+ -0.07028266 alga codice
305
+ -0.07028266 codice bandito
306
+ -0.07028266 bandito vizio
307
+ -0.07028266 vizio matto
308
+ -0.07028266 matto avviso
309
+ -0.07028266 avviso carnivoro
310
+ -1.2883639 carnevale pranzo
311
+ -0.12408276 carnivoro pranzo
312
+ -0.07028266 pranzo foglio
313
+ -0.07028266 foglio sinistra
314
+ -0.07028266 sinistra bersaglio
315
+ -0.07028266 bersaglio strano
316
+ -0.07028266 strano prezzo
317
+ -0.07028266 prezzo montagna
318
+ -0.07028266 montagna mezzogiorno
319
+ -0.07028266 mezzogiorno maestra
320
+ -0.07028266 maestra piastra
321
+ -0.07028266 piastra fratello
322
+ -0.07028266 fratello pescespada
323
+ -0.07028266 pescespada strato
324
+ -0.07028266 strato balestra
325
+ -0.07028266 balestra appuntamento
326
+ -0.07028266 appuntamento capoclasse
327
+ -0.07028266 capoclasse veglia
328
+ -0.07028266 veglia risposta
329
+ -0.07028266 risposta bicicletta
330
+ -0.07028266 bicicletta caffetteria
331
+ -0.07028266 caffetteria strizza
332
+ -0.07028266 strizza cassetto
333
+ -0.07028266 cassetto cartellino
334
+ -0.07028266 cartellino insegnamento
335
+ -1.28217 carnivoro vilo
336
+ -0.34305558 insegnamento vilo
337
+ -1.0603212 arrossamento vilo
338
+ -0.08978643 vilo soce
339
+ -0.08978643 soce gloveno
340
+ -0.08978643 gloveno munviglio
341
+ -0.08978643 munviglio depe
342
+ -0.08978643 depe banse
343
+ -0.08978643 banse coritta
344
+ -0.08978643 coritta prodeglia
345
+ -0.08978643 prodeglia arvi
346
+ -0.08978643 arvi bignalo
347
+ -0.08978643 bignalo flunestro
348
+ -0.08978643 flunestro risognefa
349
+ -0.08978643 risognefa ostro
350
+ -0.08978643 ostro bismaggo
351
+ -0.08978643 bismaggo sirvelone
352
+ -0.08978643 sirvelone chedinare
353
+ -0.08978643 chedinare dorca
354
+ -0.08978643 dorca boleggio
355
+ -0.08978643 boleggio gagliralo
356
+ -0.08978643 gagliralo nachipante
357
+ -0.08978643 nachipante nutto
358
+ -0.08978643 nutto egnomisa
359
+ -0.08978643 egnomisa coragresto
360
+ -0.08978643 coragresto lesciacope
361
+ -0.5006747 lesciacope caso
362
+ -0.6910105 vusciacope caso
363
+ -0.12437523 caso fame
364
+ -0.12437523 fame vicino
365
+ -0.12437523 vicino domenica
366
+ -0.12437523 domenica meno
367
+ -0.12437523 meno bava
368
+ -0.12437523 bava nuvola
369
+ -0.12437523 nuvola signora
370
+ -0.12437523 signora ecco
371
+ -0.12437523 ecco foga
372
+ -0.12437523 foga estate
373
+ -0.12437523 estate ospedale
374
+ -0.12437523 ospedale diva
375
+ -0.12437523 diva notte
376
+ -0.12437523 notte barile
377
+ -0.12437523 barile basilico
378
+ -0.12437523 basilico radio
379
+ -0.12437523 radio anca
380
+ -0.12437523 anca cofano
381
+ -0.12437523 cofano candito
382
+ -0.12437523 candito bivio
383
+ -0.12437523 bivio fatto
384
+ -0.12437523 fatto attesa
385
+ -0.12437523 attesa candidato
386
+ -1.279101 insegnamento scarpa
387
+ -0.232251 candidato scarpa
388
+ -0.12437523 scarpa figlio
389
+ -0.12437523 figlio palestra
390
+ -0.12437523 palestra fermaglio
391
+ -0.12437523 fermaglio strada
392
+ -0.12437523 strada pressa
393
+ -0.12437523 pressa compagno
394
+ -0.12437523 compagno soggiorno
395
+ -0.12437523 soggiorno giostra
396
+ -0.12437523 giostra mostra
397
+ -0.12437523 mostra castello
398
+ -0.12437523 castello cassaforte
399
+ -0.12437523 cassaforte strage
400
+ -0.12437523 strage minestra
401
+ -0.12437523 minestra appartamento
402
+ -0.12437523 appartamento caposcuola
403
+ -0.12437523 caposcuola taglia
404
+ -0.12437523 taglia farfalla
405
+ -0.12437523 farfalla medaglietta
406
+ -0.12437523 medaglietta caffettiera
407
+ -0.12437523 caffettiera stringa
408
+ -0.12437523 stringa gessetto
409
+ -0.12437523 gessetto cartoncino
410
+ -0.12437523 cartoncino arrossamento
411
+ -0.2331177 arrossamento vuso
412
+ -0.15417762 vuso seca
413
+ -0.15417762 seca glofeno
414
+ -0.15417762 glofeno ranviglio
415
+ -0.15417762 ranviglio dape
416
+ -0.15417762 dape larse
417
+ -0.15417762 larse dorizza
418
+ -0.15417762 dorizza pradeglia
419
+ -0.15417762 pradeglia arlo
420
+ -0.15417762 arlo begnole
421
+ -0.15417762 begnole flavestro
422
+ -0.15417762 flavestro risagnera
423
+ -0.15417762 risagnera istro
424
+ -0.15417762 istro bismoggo
425
+ -0.15417762 bismoggo sirfelone
426
+ -0.15417762 sirfelone chinadire
427
+ -0.15417762 chinadire borte
428
+ -0.15417762 borte foneggio
429
+ -0.15417762 foneggio caglivaro
430
+ -0.15417762 caglivaro pachetenta
431
+ -0.15417762 pachetenta bitto
432
+ -0.15417762 bitto egnamisa
433
+ -0.15417762 egnamisa caregresto
434
+ -0.15417762 caregresto vusciacope
435
+ -0.9777968 insegnamento naso
436
+ -0.8808868 lesciacope naso
437
+ -0.68299556 vusciacope naso
438
+ -0.88399005 nisciacope naso
439
+ -0.08978643 naso pane
440
+ -0.08978643 pane lavoro
441
+ -0.08978643 lavoro telefono
442
+ -0.08978643 telefono mano
443
+ -0.08978643 mano fava
444
+ -0.08978643 fava natura
445
+ -0.08978643 natura persona
446
+ -0.08978643 persona anno
447
+ -0.08978643 anno foce
448
+ -0.08978643 foce angolo
449
+ -0.08978643 angolo personale
450
+ -0.08978643 personale diga
451
+ -0.08978643 diga gatto
452
+ -0.08978643 gatto badile
453
+ -0.08978643 badile bonifico
454
+ -0.08978643 bonifico linea
455
+ -0.08978643 linea alba
456
+ -0.08978643 alba cometa
457
+ -0.08978643 cometa pentito
458
+ -0.08978643 pentito tizio
459
+ -0.08978643 tizio patto
460
+ -0.08978643 patto offesa
461
+ -0.08978643 offesa calderone
462
+ -0.08978643 calderone frutta
463
+ -0.08978643 frutta foglia
464
+ -0.08978643 foglia finestra
465
+ -0.08978643 finestra ventaglio
466
+ -0.08978643 ventaglio strega
467
+ -0.08978643 strega pronto
468
+ -0.08978643 pronto campagna
469
+ -0.08978643 campagna buongiorno
470
+ -0.08978643 buongiorno destra
471
+ -0.08978643 destra lastra
472
+ -0.08978643 lastra cappello
473
+ -0.98564714 cartapesta strofa
474
+ -0.08978643 strofa ministra
475
+ -0.08978643 ministra inquinamento
476
+ -0.08978643 inquinamento caposquadra
477
+ -0.08978643 caposquadra tiglio
478
+ -0.08978643 tiglio fantasma
479
+ -0.08978643 fantasma motocicletta
480
+ -0.08978643 motocicletta caffelatte
481
+ -0.08978643 caffelatte strillo
482
+ -0.08978643 strillo rossetto
483
+ -0.08978643 rossetto cartellone
484
+ -0.08978643 cartellone sollevamento
485
+ -0.08978643 sollevamento valo
486
+ -0.08978643 valo save
487
+ -0.08978643 save glovelo
488
+ -0.08978643 glovelo sonviglio
489
+ -0.08978643 sonviglio dete
490
+ -0.08978643 dete farde
491
+ -0.08978643 farde poritta
492
+ -0.08978643 poritta prodiglia
493
+ -0.08978643 prodiglia anci
494
+ -0.08978643 anci bignaso
495
+ -0.08978643 bignaso flenestro
496
+ -0.08978643 flenestro risugnela
497
+ -0.08978643 risugnela ustro
498
+ -0.08978643 ustro bismacco
499
+ -0.08978643 bismacco sarvelone
500
+ -0.08978643 sarvelone chetinere
501
+ -0.08978643 chetinere binca
502
+ -0.08978643 binca lonaggio
503
+ -0.08978643 lonaggio cagliralo
504
+ -0.08978643 cagliralo machidante
505
+ -0.08978643 machidante tucca
506
+ -0.08978643 tucca ignemisa
507
+ -0.08978643 ignemisa coregresta
508
+ -0.08978643 coregresta nisciacope
509
+
510
+ \end\
language_model/2gram_It_Ref.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f498476811af80757fe360dcae5de0ab06fa73fdec859cbc4e824160129e3540
3
+ size 13061
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ </s>
2
+ <s>
3
+ alba
4
+ alcelisto
5
+ alga
6
+ anca
7
+ anci
8
+ angolo
9
+ anno
10
+ appartamento
11
+ appuntamento
12
+ aprile
13
+ arlo
14
+ arrossamento
15
+ arvi
16
+ attesa
17
+ avviso
18
+ bacio
19
+ badile
20
+ balestra
21
+ bambina
22
+ bandito
23
+ banse
24
+ banzione
25
+ barile
26
+ basilico
27
+ bava
28
+ begnole
29
+ bersaglio
30
+ bicicletta
31
+ bignalo
32
+ bignaso
33
+ binca
34
+ binocolo
35
+ bismacco
36
+ bismaggo
37
+ bismoggo
38
+ bitto
39
+ bivio
40
+ boleggio
41
+ bonifico
42
+ borte
43
+ buongiorno
44
+ caffelatte
45
+ caffetteria
46
+ caffettiera
47
+ cagliralo
48
+ caglivaro
49
+ calderone
50
+ camera
51
+ campagna
52
+ candidato
53
+ candito
54
+ canfrosto
55
+ canile
56
+ capoclasse
57
+ caposcuola
58
+ caposquadra
59
+ cappello
60
+ caregresto
61
+ carnevale
62
+ carnivoro
63
+ cartapesta
64
+ cartellino
65
+ cartellone
66
+ cartoncino
67
+ caso
68
+ cassaforte
69
+ cassetto
70
+ castello
71
+ cava
72
+ ceglirate
73
+ chedinare
74
+ chetinere
75
+ chinadire
76
+ codice
77
+ cofano
78
+ cometa
79
+ compagno
80
+ coragresto
81
+ coregresta
82
+ coritta
83
+ costro
84
+ cuore
85
+ dape
86
+ depe
87
+ destra
88
+ dete
89
+ diga
90
+ diva
91
+ domenica
92
+ dorca
93
+ dorizza
94
+ duna
95
+ ecco
96
+ egnamisa
97
+ egnomisa
98
+ estate
99
+ fame
100
+ fantasma
101
+ farde
102
+ farfalla
103
+ fatto
104
+ fava
105
+ fermaglio
106
+ figlio
107
+ finestra
108
+ flavestro
109
+ flenestro
110
+ flunestro
111
+ foca
112
+ foce
113
+ foga
114
+ foglia
115
+ foglio
116
+ foneggio
117
+ fratello
118
+ frimace
119
+ frutta
120
+ gagliralo
121
+ gatto
122
+ genitore
123
+ gessetto
124
+ gioco
125
+ giostra
126
+ girotondo
127
+ glofeno
128
+ glovelo
129
+ gloveno
130
+ ignemisa
131
+ inquinamento
132
+ insegnamento
133
+ istro
134
+ lada
135
+ larse
136
+ lastra
137
+ lavoro
138
+ lesciacope
139
+ linea
140
+ lonaggio
141
+ machidante
142
+ maestra
143
+ mano
144
+ margherita
145
+ maschera
146
+ matto
147
+ medaglietta
148
+ mela
149
+ meno
150
+ mese
151
+ mezzogiorno
152
+ minestra
153
+ ministra
154
+ mondo
155
+ montagna
156
+ mostra
157
+ motocicletta
158
+ munviglio
159
+ nachipante
160
+ naso
161
+ natura
162
+ nisciacope
163
+ nonno
164
+ notte
165
+ nurto
166
+ nutto
167
+ nuvola
168
+ oblimione
169
+ offesa
170
+ oggi
171
+ ospedale
172
+ ostro
173
+ pace
174
+ pachetenta
175
+ palestra
176
+ pane
177
+ parola
178
+ patto
179
+ pentito
180
+ persona
181
+ personale
182
+ pescespada
183
+ piastra
184
+ polipo
185
+ pomodoro
186
+ poritta
187
+ pradeglia
188
+ pranzo
189
+ presemma
190
+ pressa
191
+ prezzemolo
192
+ prezzo
193
+ prodeglia
194
+ prodiglia
195
+ pronto
196
+ radio
197
+ ranviglio
198
+ risagnera
199
+ risognefa
200
+ risposta
201
+ risugnela
202
+ rofe
203
+ rossetto
204
+ sarvelone
205
+ save
206
+ scarpa
207
+ seca
208
+ signora
209
+ sinistra
210
+ sirfelone
211
+ sirvelone
212
+ soce
213
+ soggiorno
214
+ sollevamento
215
+ sonviglio
216
+ strada
217
+ strage
218
+ strano
219
+ strato
220
+ strega
221
+ strillo
222
+ stringa
223
+ strizza
224
+ strofa
225
+ taglia
226
+ telefono
227
+ tentacolo
228
+ tiglio
229
+ tizio
230
+ tucca
231
+ ustro
232
+ valo
233
+ vaso
234
+ veglia
235
+ ventaglio
236
+ vicino
237
+ vilo
238
+ vizio
239
+ vufebu
240
+ vusciacope
241
+ vuso
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7281f836a6c24a0cb01dcd6abcaccc1d05af48807a5138b3f39c235904e0d7f
3
+ size 1262112241
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": false
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "<pad>",
42
+ "processor_class": "Wav2Vec2ProcessorWithLM",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "<unk>",
47
+ "word_delimiter_token": "|"
48
+ }
transcriptions_cv7_test.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7ccd1835ec95183cc10c41dddf4532f3700c79d1d9979197242a697c24ce54
3
+ size 78764122
transcriptions_cv7_validation.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ec28e9aa51878172c99ac03e1d68611f1a37bc6acd12fa96b22a2f474e1a3c
3
+ size 78184898
vocab.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "'": 5,
3
+ "-": 6,
4
+ "</s>": 2,
5
+ "<pad>": 0,
6
+ "<s>": 1,
7
+ "<unk>": 3,
8
+ "a": 7,
9
+ "b": 8,
10
+ "c": 9,
11
+ "d": 10,
12
+ "e": 11,
13
+ "f": 12,
14
+ "g": 13,
15
+ "h": 14,
16
+ "i": 15,
17
+ "j": 16,
18
+ "k": 17,
19
+ "l": 18,
20
+ "m": 19,
21
+ "n": 20,
22
+ "o": 21,
23
+ "p": 22,
24
+ "q": 23,
25
+ "r": 24,
26
+ "s": 25,
27
+ "t": 26,
28
+ "u": 27,
29
+ "v": 28,
30
+ "w": 29,
31
+ "x": 30,
32
+ "y": 31,
33
+ "z": 32,
34
+ "|": 4,
35
+ "à": 33,
36
+ "á": 34,
37
+ "è": 35,
38
+ "é": 36,
39
+ "ì": 37,
40
+ "í": 38,
41
+ "ò": 39,
42
+ "ó": 40,
43
+ "ù": 41,
44
+ "ú": 42,
45
+ "č": 43,
46
+ "ō": 44,
47
+ "š": 45
48
+ }