Dandan0K commited on
Commit
0608e53
1 Parent(s): 59864e2

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -2,34 +2,28 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
22
  *.tgz filter=lfs diff=lfs merge=lfs -text
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ transcriptions_cv7_test.json filter=lfs diff=lfs merge=lfs -text
29
+ transcriptions_cv7_validation.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - it
4
+ license: apache-2.0
5
+ tags:
6
+ - automatic-speech-recognition
7
+ - it
8
+ datasets:
9
+ - mozilla-foundation/common_voice_7_0
10
+ ---
11
+ # exp_w2v2t_it_vp-100k_s449
12
+
13
+ Fine-tuned [facebook/wav2vec2-large-100k-voxpopuli](https://huggingface.co/facebook/wav2vec2-large-100k-voxpopuli) for speech recognition using the train split of [Common Voice 7.0 (it)](https://huggingface.co/datasets/mozilla-foundation/common_voice_7_0).
14
+ When using this model, make sure that your speech input is sampled at 16kHz.
15
+
16
+ This model has been fine-tuned by the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) tool.
17
+
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["", "<s>", "</s>", "\u2047", " ", "'", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00e0", "\u00e1", "\u00e8", "\u00e9", "\u00ec", "\u00ed", "\u00f2", "\u00f3", "\u00f9", "\u00fa", "\u010d", "\u014d", "\u0161"], "is_bpe": false}
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-100k-voxpopuli",
3
+ "activation_dropout": 0.05,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.05,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "sum",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.05,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.05,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.05,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 0,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.15.0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 46,
115
+ "xvector_output_dim": 512
116
+ }
language_model/2gram_It_Hum_no_df1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c02e105347e415ba1e0761f628957fac2178f2d4541bf986b061464382fcd42
3
+ size 51053
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
@@ -0,0 +1,757 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ </s>
2
+ <s>
3
+ a
4
+ ablalisto
5
+ ablelisto
6
+ ablimione
7
+ acco
8
+ aec
9
+ aestrada
10
+ agnamisa
11
+ al
12
+ alba
13
+ alc
14
+ alce
15
+ alceli
16
+ alcelis
17
+ alcelisisisotodoto
18
+ alcelista
19
+ alcelisto
20
+ alcelsivi
21
+ alcesti
22
+ alcestio
23
+ alcestito
24
+ alcezio
25
+ alchelisto
26
+ alecisto
27
+ alga
28
+ ananca
29
+ anca
30
+ anci
31
+ angelo
32
+ angola
33
+ angolo
34
+ anno
35
+ annocole
36
+ appartamento
37
+ appertamento
38
+ apportamento
39
+ appu
40
+ appunta
41
+ appuntamento
42
+ aprile
43
+ ar
44
+ arlo
45
+ arni
46
+ arrosamento
47
+ arrossa
48
+ arrossamento
49
+ arvi
50
+ aspedale
51
+ astate
52
+ astro
53
+ attesa
54
+ avviso
55
+ b
56
+ ba
57
+ babbile
58
+ bacio
59
+ badile
60
+ bale
61
+ bales
62
+ balestra
63
+ bambina
64
+ bambino
65
+ ban
66
+ bandito
67
+ banse
68
+ banzione
69
+ barc
70
+ baril
71
+ barile
72
+ basilico
73
+ bava
74
+ bavo
75
+ be
76
+ begnole
77
+ bepe
78
+ ber
79
+ bersa
80
+ bersaglio
81
+ bestra
82
+ bi
83
+ bia
84
+ bian
85
+ bianca
86
+ bicicletta
87
+ bigliaso
88
+ bignalo
89
+ bignas
90
+ bignaso
91
+ bignolia
92
+ bin
93
+ binboggio
94
+ binca
95
+ bincolo
96
+ binoccolo
97
+ binocolo
98
+ bis
99
+ bismacco
100
+ bismaco
101
+ bismag
102
+ bismaggo
103
+ bismago
104
+ bismo
105
+ bismoggo
106
+ bitto
107
+ bivio
108
+ bo
109
+ bodifi
110
+ bodifico
111
+ bole
112
+ boleggio
113
+ bolifi
114
+ bolifico
115
+ boni
116
+ bonifico
117
+ bor
118
+ borte
119
+ bu
120
+ bufebbu
121
+ bufebe
122
+ bufebu
123
+ buna
124
+ buongiorno
125
+ c
126
+ ca
127
+ cafelat
128
+ caff?
129
+ caffattiera
130
+ caffel
131
+ caffelatte
132
+ caffetteria
133
+ caffettiera
134
+ caglia
135
+ cagliavaro
136
+ cagliralo
137
+ caglirano
138
+ caglirilo
139
+ caglivaro
140
+ calderone
141
+ caliralo
142
+ callivaro
143
+ camer
144
+ camera
145
+ cammera
146
+ campagna
147
+ camposquadra
148
+ can
149
+ candidato
150
+ candito
151
+ canenfrosto
152
+ canentrosto
153
+ canf
154
+ canfo
155
+ canfosto
156
+ canfostro
157
+ canfro
158
+ canfronsto
159
+ canfrosto
160
+ canfrostro
161
+ caniele
162
+ canile
163
+ canindato
164
+ cannefrosto
165
+ cantapesta
166
+ cantello
167
+ capoadra
168
+ capocla
169
+ capoclass
170
+ capoclasse
171
+ capoquardra
172
+ caposcuola
173
+ caposquadra
174
+ caposquodra
175
+ cappello
176
+ capuscola
177
+ car
178
+ care
179
+ caregresto
180
+ cariereta
181
+ carletino
182
+ carli
183
+ carnevale
184
+ carnivoro
185
+ cart
186
+ cartapesta
187
+ cartegresto
188
+ cartellino
189
+ cartello
190
+ cartellone
191
+ cartepes
192
+ cartoncino
193
+ carvelale
194
+ cas
195
+ casa
196
+ casatello
197
+ caso
198
+ cassaforte
199
+ casse
200
+ cassetto
201
+ castello
202
+ cava
203
+ ce
204
+ cedicare
205
+ cegli
206
+ cegliar
207
+ cegliarate
208
+ cegliarte
209
+ cegligrate
210
+ ceglira
211
+ ceglirate
212
+ ceglireta
213
+ celgliarate
214
+ celia
215
+ cellirate
216
+ cellire
217
+ cen
218
+ chedi
219
+ chedimare
220
+ chedinare
221
+ chegliare
222
+ cheglirate
223
+ cher
224
+ chetinere
225
+ chettinere
226
+ chevin
227
+ chia
228
+ chinadire
229
+ chindiare
230
+ ciglilate
231
+ cioco
232
+ co
233
+ codi
234
+ codice
235
+ cofa
236
+ cofano
237
+ cofe
238
+ coffetteria
239
+ coglieralo
240
+ col
241
+ colderone
242
+ colto
243
+ comenta
244
+ cometa
245
+ compagna
246
+ compagno
247
+ compelo
248
+ con
249
+ condidato
250
+ condito
251
+ confettiera
252
+ confi
253
+ confrosto
254
+ contanpesta
255
+ coppe
256
+ cor
257
+ cora
258
+ corageso
259
+ coragnesto
260
+ coragresto
261
+ core
262
+ coregesta
263
+ coregnesta
264
+ coregresta
265
+ coregresto
266
+ coritta
267
+ cornevale
268
+ corrita
269
+ cortapesta
270
+ corvegresta
271
+ costro
272
+ cotro
273
+ cuore
274
+ custro
275
+ da
276
+ dabe
277
+ dabile
278
+ dadile
279
+ dales
280
+ dandi
281
+ danzione
282
+ dape
283
+ das
284
+ dasilico
285
+ dava
286
+ dazione
287
+ de
288
+ degnole
289
+ depe
290
+ dersagl
291
+ destr
292
+ destra
293
+ dete
294
+ dette
295
+ di
296
+ dici
297
+ dicicletta
298
+ diga
299
+ dignaso
300
+ dilo
301
+ dis
302
+ dismaggo
303
+ dismoggo
304
+ dismogo
305
+ diva
306
+ divio
307
+ do
308
+ domenica
309
+ donifico
310
+ dor
311
+ dorca
312
+ dorizza
313
+ du
314
+ duna
315
+ e
316
+ ec
317
+ ecco
318
+ egnamisa
319
+ egnomisa
320
+ egnomista
321
+ elce
322
+ esetate
323
+ espedale
324
+ espegale
325
+ estate
326
+ etteza
327
+ f
328
+ fa
329
+ fafa
330
+ faga
331
+ fal
332
+ fallo
333
+ fame
334
+ fantasma
335
+ far
336
+ fard
337
+ farde
338
+ farfalla
339
+ farfalle
340
+ farmacio
341
+ fatto
342
+ fav
343
+ fava
344
+ felmaglio
345
+ feressa
346
+ fermaglio
347
+ fevubo
348
+ ff
349
+ fff
350
+ fi
351
+ figl
352
+ figlio
353
+ fine
354
+ finestra
355
+ finistra
356
+ fl
357
+ flavestro
358
+ flenastro
359
+ flene
360
+ flenestro
361
+ flenetrego
362
+ flenstro
363
+ flu
364
+ flunestro
365
+ flustro
366
+ fo
367
+ foca
368
+ foce
369
+ foga
370
+ foglia
371
+ foglio
372
+ fonagio
373
+ foneggio
374
+ fore
375
+ forfalle
376
+ fr
377
+ frate
378
+ fratello
379
+ fresemma
380
+ fri
381
+ friermace
382
+ frima
383
+ frimace
384
+ frimache
385
+ frimasce
386
+ frimece
387
+ frimoce
388
+ frostro
389
+ fru
390
+ frutta
391
+ fu
392
+ fube
393
+ fugiaco
394
+ fupebo
395
+ fuvebu
396
+ ga
397
+ gadliralo
398
+ gagliralo
399
+ gairalo
400
+ gal
401
+ galgi
402
+ galgi?
403
+ galialo
404
+ gatto
405
+ ge
406
+ genitore
407
+ gessetto
408
+ gi
409
+ gi?
410
+ gin
411
+ gio
412
+ gioco
413
+ gior
414
+ giossetto
415
+ giostra
416
+ girondolo
417
+ giroton
418
+ girotondo
419
+ girotonondo
420
+ giustra
421
+ glio
422
+ glioveglio
423
+ gliovelo
424
+ glirate
425
+ glofano
426
+ glofeno
427
+ glove
428
+ glovello
429
+ glovelo
430
+ gloveno
431
+ gloverno
432
+ gnalo
433
+ go
434
+ gresto
435
+ grimace
436
+ ignemisa
437
+ ilnegisa
438
+ inlesa
439
+ inquinamento
440
+ insegnamento
441
+ insegnamentorofe
442
+ insegne
443
+ iovelo
444
+ isegnmisa
445
+ istro
446
+ isvelone
447
+ la
448
+ laba
449
+ lad
450
+ lada
451
+ lana
452
+ lars
453
+ larse
454
+ lastra
455
+ lavoro
456
+ le
457
+ les
458
+ lesciacode
459
+ lesciacope
460
+ linea
461
+ listo
462
+ lo
463
+ loanovarro
464
+ loba
465
+ lonacio
466
+ lonaggio
467
+ lonagio
468
+ longio
469
+ lorse
470
+ lu
471
+ lurto
472
+ m
473
+ ma
474
+ macchidante
475
+ machi
476
+ machidante
477
+ maestra
478
+ mano
479
+ mar
480
+ marchi
481
+ marg
482
+ marghe
483
+ margherita
484
+ marghrerita
485
+ mas
486
+ mascere
487
+ maschera
488
+ mascheri
489
+ mase
490
+ matta
491
+ matto
492
+ me
493
+ medaglia
494
+ medaglietta
495
+ medegliatta
496
+ mela
497
+ meno
498
+ meschera
499
+ mese
500
+ mezza
501
+ mezzogior
502
+ mezzogiorno
503
+ mi
504
+ mina
505
+ mine
506
+ minestra
507
+ mini
508
+ ministra
509
+ minitra
510
+ mis
511
+ mo
512
+ mondo
513
+ monondo
514
+ montagna
515
+ mostra
516
+ motocicletta
517
+ munviglio
518
+ munviio
519
+ muviglio
520
+ na
521
+ nachipante
522
+ naso
523
+ natura
524
+ nisciacope
525
+ nisciocope
526
+ no
527
+ nocipante
528
+ nonna
529
+ nonno
530
+ norci
531
+ notte
532
+ nu
533
+ nudo
534
+ nurto
535
+ nutro
536
+ nutto
537
+ nuvola
538
+ o
539
+ obblimione
540
+ obli
541
+ oblibione
542
+ oblie
543
+ oblimi
544
+ oblimione
545
+ oblimo
546
+ offesa
547
+ oggi
548
+ ognamise
549
+ oignomisa
550
+ olblimione
551
+ oli
552
+ olimione
553
+ olimo
554
+ oppinione
555
+ oppuntame
556
+ orlo
557
+ orsamento
558
+ ospedale
559
+ ostro
560
+ p
561
+ pa
562
+ pace
563
+ pachetenta
564
+ pales
565
+ palestra
566
+ pane
567
+ par
568
+ parola
569
+ pasce
570
+ patto
571
+ pe
572
+ pelipo
573
+ pen
574
+ pentito
575
+ per
576
+ pers
577
+ persemma
578
+ perso
579
+ persona
580
+ personale
581
+ perzzemolo
582
+ pesc
583
+ pesce
584
+ pescespada
585
+ pesche
586
+ pez
587
+ piastra
588
+ pista
589
+ po
590
+ pochetaenta
591
+ pochetenta
592
+ poli
593
+ polino
594
+ polipo
595
+ pomo
596
+ pomodoro
597
+ por
598
+ porita
599
+ poritta
600
+ potto
601
+ pradeglia
602
+ pran
603
+ pranzo
604
+ pre
605
+ predeglia
606
+ premme
607
+ premmes
608
+ premmesa
609
+ presa
610
+ presamma
611
+ prese
612
+ presemma
613
+ preveva
614
+ prezze
615
+ prezzemolo
616
+ prezzo
617
+ probeglia
618
+ prodeglia
619
+ prodeia
620
+ prodiglia
621
+ pronto
622
+ puntamento
623
+ radio
624
+ ranviglio
625
+ ranvioglio
626
+ raviglio
627
+ ri
628
+ riposta
629
+ ris
630
+ risagnera
631
+ risegnara
632
+ risognefa
633
+ risposta
634
+ ristogefa
635
+ risugne
636
+ risugnela
637
+ ro
638
+ rofe
639
+ ros
640
+ rossetto
641
+ rove
642
+ rovello
643
+ ru
644
+ rufebu
645
+ rufelu
646
+ sa
647
+ salvelone
648
+ sarmelone
649
+ sarvellone
650
+ sarvelone
651
+ save
652
+ scarpa
653
+ seca
654
+ secca
655
+ seglirate
656
+ sforta
657
+ sfrilo
658
+ sglofeno
659
+ si
660
+ signora
661
+ sinistra
662
+ sinistre
663
+ sinora
664
+ sirfelone
665
+ sirvellone
666
+ sirvelone
667
+ sisagnera
668
+ sisvelone
669
+ so
670
+ soce
671
+ soggiorno
672
+ sogno
673
+ soigiorno
674
+ sollevamento
675
+ sonviglio
676
+ sonviio
677
+ sottori
678
+ sottra
679
+ sottraghe
680
+ sponto
681
+ spor
682
+ ss
683
+ st
684
+ stage
685
+ star
686
+ ste
687
+ sterada
688
+ sto
689
+ str
690
+ stra
691
+ strada
692
+ strage
693
+ strano
694
+ strato
695
+ stre
696
+ strega
697
+ strillo
698
+ strilo
699
+ strin
700
+ stringa
701
+ strizza
702
+ stro
703
+ strof
704
+ strofa
705
+ strova
706
+ sve
707
+ sveglia
708
+ t
709
+ taglia
710
+ tanfo
711
+ tele
712
+ telefono
713
+ ten
714
+ tenantacolo
715
+ tenta
716
+ tentaco
717
+ tentacoli
718
+ tentacolo
719
+ testa
720
+ ti
721
+ tigl
722
+ tiglio
723
+ tiio
724
+ tizio
725
+ trano
726
+ tucca
727
+ tul
728
+ turlo
729
+ ustra
730
+ ustro
731
+ v
732
+ valo
733
+ vaso
734
+ ve
735
+ veglia
736
+ ventaglio
737
+ ventaio
738
+ versaglio
739
+ vetaglio
740
+ vi
741
+ vicino
742
+ vilo
743
+ vizio
744
+ voce
745
+ voglia
746
+ volo
747
+ vu
748
+ vube
749
+ vufebe
750
+ vufebo
751
+ vufebu
752
+ vufedu
753
+ vufpebo
754
+ vufube
755
+ vure
756
+ vusciacope
757
+ vuso
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7281f836a6c24a0cb01dcd6abcaccc1d05af48807a5138b3f39c235904e0d7f
3
+ size 1262112241
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": false
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "<pad>",
42
+ "processor_class": "Wav2Vec2ProcessorWithLM",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "<unk>",
47
+ "word_delimiter_token": "|"
48
+ }
transcriptions_cv7_test.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7ccd1835ec95183cc10c41dddf4532f3700c79d1d9979197242a697c24ce54
3
+ size 78764122
transcriptions_cv7_validation.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ec28e9aa51878172c99ac03e1d68611f1a37bc6acd12fa96b22a2f474e1a3c
3
+ size 78184898
vocab.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "'": 5,
3
+ "-": 6,
4
+ "</s>": 2,
5
+ "<pad>": 0,
6
+ "<s>": 1,
7
+ "<unk>": 3,
8
+ "a": 7,
9
+ "b": 8,
10
+ "c": 9,
11
+ "d": 10,
12
+ "e": 11,
13
+ "f": 12,
14
+ "g": 13,
15
+ "h": 14,
16
+ "i": 15,
17
+ "j": 16,
18
+ "k": 17,
19
+ "l": 18,
20
+ "m": 19,
21
+ "n": 20,
22
+ "o": 21,
23
+ "p": 22,
24
+ "q": 23,
25
+ "r": 24,
26
+ "s": 25,
27
+ "t": 26,
28
+ "u": 27,
29
+ "v": 28,
30
+ "w": 29,
31
+ "x": 30,
32
+ "y": 31,
33
+ "z": 32,
34
+ "|": 4,
35
+ "à": 33,
36
+ "á": 34,
37
+ "è": 35,
38
+ "é": 36,
39
+ "ì": 37,
40
+ "í": 38,
41
+ "ò": 39,
42
+ "ó": 40,
43
+ "ù": 41,
44
+ "ú": 42,
45
+ "č": 43,
46
+ "ō": 44,
47
+ "š": 45
48
+ }