Dandan0K commited on
Commit
7101aac
1 Parent(s): e67ba53

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,27 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - it
4
+ license: apache-2.0
5
+ tags:
6
+ - automatic-speech-recognition
7
+ - hf-asr-leaderboard
8
+ - it
9
+ - mozilla-foundation/common_voice_8_0
10
+ - robust-speech-event
11
+ datasets:
12
+ - mozilla-foundation/common_voice_8_0
13
+ model-index:
14
+ - name: XLS-R Wav2Vec2 Italian by Jonatas Grosman
15
+ results:
16
+ - task:
17
+ name: Automatic Speech Recognition
18
+ type: automatic-speech-recognition
19
+ dataset:
20
+ name: Common Voice 8
21
+ type: mozilla-foundation/common_voice_8_0
22
+ args: it
23
+ metrics:
24
+ - name: Test WER
25
+ type: wer
26
+ value: 9.04
27
+ - name: Test CER
28
+ type: cer
29
+ value: 2.2
30
+ - name: Test WER (+LM)
31
+ type: wer
32
+ value: 6.75
33
+ - name: Test CER (+LM)
34
+ type: cer
35
+ value: 1.76
36
+ - task:
37
+ name: Automatic Speech Recognition
38
+ type: automatic-speech-recognition
39
+ dataset:
40
+ name: Robust Speech Event - Dev Data
41
+ type: speech-recognition-community-v2/dev_data
42
+ args: it
43
+ metrics:
44
+ - name: Dev WER
45
+ type: wer
46
+ value: 23.38
47
+ - name: Dev CER
48
+ type: cer
49
+ value: 9.41
50
+ - name: Dev WER (+LM)
51
+ type: wer
52
+ value: 15.84
53
+ - name: Dev CER (+LM)
54
+ type: cer
55
+ value: 8.93
56
+ - task:
57
+ name: Automatic Speech Recognition
58
+ type: automatic-speech-recognition
59
+ dataset:
60
+ name: Robust Speech Event - Test Data
61
+ type: speech-recognition-community-v2/eval_data
62
+ args: it
63
+ metrics:
64
+ - name: Test WER
65
+ type: wer
66
+ value: 18.34
67
+ ---
68
+
69
+ # Fine-tuned XLS-R 1B model for speech recognition in Italian
70
+
71
+ Fine-tuned [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on Italian using the train and validation splits of [Common Voice 8.0](https://huggingface.co/datasets/mozilla-foundation/common_voice_8_0), [Multilingual TEDx](http://www.openslr.org/100), [Multilingual LibriSpeech](https://www.openslr.org/94/), and [Voxpopuli](https://github.com/facebookresearch/voxpopuli).
72
+ When using this model, make sure that your speech input is sampled at 16kHz.
73
+
74
+ This model has been fine-tuned by the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) tool, and thanks to the GPU credits generously given by the [OVHcloud](https://www.ovhcloud.com/en/public-cloud/ai-training/) :)
75
+
76
+ ## Usage
77
+
78
+ Using the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) library:
79
+
80
+ ```python
81
+ from huggingsound import SpeechRecognitionModel
82
+
83
+ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-xls-r-1b-italian")
84
+ audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"]
85
+
86
+ transcriptions = model.transcribe(audio_paths)
87
+ ```
88
+
89
+ Writing your own inference script:
90
+
91
+ ```python
92
+ import torch
93
+ import librosa
94
+ from datasets import load_dataset
95
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
96
+
97
+ LANG_ID = "it"
98
+ MODEL_ID = "jonatasgrosman/wav2vec2-xls-r-1b-italian"
99
+ SAMPLES = 10
100
+
101
+ test_dataset = load_dataset("common_voice", LANG_ID, split=f"test[:{SAMPLES}]")
102
+
103
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
104
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
105
+
106
+ # Preprocessing the datasets.
107
+ # We need to read the audio files as arrays
108
+ def speech_file_to_array_fn(batch):
109
+ speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
110
+ batch["speech"] = speech_array
111
+ batch["sentence"] = batch["sentence"].upper()
112
+ return batch
113
+
114
+ test_dataset = test_dataset.map(speech_file_to_array_fn)
115
+ inputs = processor(test_dataset["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
116
+
117
+ with torch.no_grad():
118
+ logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
119
+
120
+ predicted_ids = torch.argmax(logits, dim=-1)
121
+ predicted_sentences = processor.batch_decode(predicted_ids)
122
+ ```
123
+
124
+ ## Evaluation Commands
125
+
126
+ 1. To evaluate on `mozilla-foundation/common_voice_8_0` with split `test`
127
+
128
+ ```bash
129
+ python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset mozilla-foundation/common_voice_8_0 --config it --split test
130
+ ```
131
+
132
+ 2. To evaluate on `speech-recognition-community-v2/dev_data`
133
+
134
+ ```bash
135
+ python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset speech-recognition-community-v2/dev_data --config it --split validation --chunk_length_s 5.0 --stride_length_s 1.0
136
+ ```
137
+
138
+ ## Citation
139
+ If you want to cite this model you can use this:
140
+
141
+ ```bibtex
142
+ @misc{grosman2021xlsr-1b-italian,
143
+ title={Fine-tuned {XLS-R} 1{B} model for speech recognition in {I}talian},
144
+ author={Grosman, Jonatas},
145
+ howpublished={\url{https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-italian}},
146
+ year={2022}
147
+ }
148
+ ```
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["", "<s>", "</s>", "\u2047", " ", "'", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00e0", "\u00e1", "\u00e8", "\u00e9", "\u00ec", "\u00ed", "\u00f2", "\u00f3", "\u00f9", "\u00fa"], "is_bpe": false}
config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-1b",
3
+ "activation_dropout": 0.05,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.05,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 1024,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.05,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.05,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.05,
57
+ "hidden_size": 1280,
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 5120,
60
+ "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.05,
62
+ "mask_feature_length": 10,
63
+ "mask_feature_min_masks": 0,
64
+ "mask_feature_prob": 0.0,
65
+ "mask_time_length": 10,
66
+ "mask_time_min_masks": 2,
67
+ "mask_time_prob": 0.05,
68
+ "model_type": "wav2vec2",
69
+ "num_adapter_layers": 3,
70
+ "num_attention_heads": 16,
71
+ "num_codevector_groups": 2,
72
+ "num_codevectors_per_group": 320,
73
+ "num_conv_pos_embedding_groups": 16,
74
+ "num_conv_pos_embeddings": 128,
75
+ "num_feat_extract_layers": 7,
76
+ "num_hidden_layers": 48,
77
+ "num_negatives": 100,
78
+ "output_hidden_size": 1280,
79
+ "pad_token_id": 0,
80
+ "proj_codevector_dim": 1024,
81
+ "tdnn_dilation": [
82
+ 1,
83
+ 2,
84
+ 3,
85
+ 1,
86
+ 1
87
+ ],
88
+ "tdnn_dim": [
89
+ 512,
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 1500
94
+ ],
95
+ "tdnn_kernel": [
96
+ 5,
97
+ 3,
98
+ 3,
99
+ 1,
100
+ 1
101
+ ],
102
+ "torch_dtype": "float32",
103
+ "transformers_version": "4.16.0.dev0",
104
+ "use_weighted_layer_sum": false,
105
+ "vocab_size": 43,
106
+ "xvector_output_dim": 512
107
+ }
eval.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from datasets import load_dataset, load_metric, Audio, Dataset
3
+ from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer, AutoConfig, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
4
+ import re
5
+ import torch
6
+ import argparse
7
+ from typing import Dict
8
+
9
+ def log_results(result: Dataset, args: Dict[str, str]):
10
+ """ DO NOT CHANGE. This function computes and logs the result metrics. """
11
+
12
+ log_outputs = args.log_outputs
13
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
14
+
15
+ # load metric
16
+ wer = load_metric("wer")
17
+ cer = load_metric("cer")
18
+
19
+ # compute metrics
20
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
21
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
22
+
23
+ # print & log results
24
+ result_str = (
25
+ f"WER: {wer_result}\n"
26
+ f"CER: {cer_result}"
27
+ )
28
+ print(result_str)
29
+
30
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
31
+ f.write(result_str)
32
+
33
+ # log all results in text file. Possibly interesting for analysis
34
+ if log_outputs is not None:
35
+ pred_file = f"log_{dataset_id}_predictions.txt"
36
+ target_file = f"log_{dataset_id}_targets.txt"
37
+
38
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
39
+
40
+ # mapping function to write output
41
+ def write_to_file(batch, i):
42
+ p.write(f"{i}" + "\n")
43
+ p.write(batch["prediction"] + "\n")
44
+ t.write(f"{i}" + "\n")
45
+ t.write(batch["target"] + "\n")
46
+
47
+ result.map(write_to_file, with_indices=True)
48
+
49
+
50
+ def normalize_text(text: str, invalid_chars_regex: str, to_lower: bool) -> str:
51
+ """ DO ADAPT FOR YOUR USE CASE. this function normalizes the target text. """
52
+
53
+ text = text.lower() if to_lower else text.upper()
54
+
55
+ text = re.sub(invalid_chars_regex, " ", text)
56
+
57
+ text = re.sub("\s+", " ", text).strip()
58
+
59
+ return text
60
+
61
+
62
+ def main(args):
63
+ # load dataset
64
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
65
+
66
+ # for testing: only process the first two examples as a test
67
+ # dataset = dataset.select(range(10))
68
+
69
+ # load processor
70
+ if args.greedy:
71
+ processor = Wav2Vec2Processor.from_pretrained(args.model_id)
72
+ decoder = None
73
+ else:
74
+ processor = Wav2Vec2ProcessorWithLM.from_pretrained(args.model_id)
75
+ decoder = processor.decoder
76
+
77
+ feature_extractor = processor.feature_extractor
78
+ tokenizer = processor.tokenizer
79
+
80
+ # resample audio
81
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
82
+
83
+ # load eval pipeline
84
+ if args.device is None:
85
+ args.device = 0 if torch.cuda.is_available() else -1
86
+
87
+ config = AutoConfig.from_pretrained(args.model_id)
88
+ model = AutoModelForCTC.from_pretrained(args.model_id)
89
+
90
+ #asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
91
+ asr = pipeline("automatic-speech-recognition", config=config, model=model, tokenizer=tokenizer,
92
+ feature_extractor=feature_extractor, decoder=decoder, device=args.device)
93
+
94
+ # build normalizer config
95
+ tokenizer = AutoTokenizer.from_pretrained(args.model_id)
96
+ tokens = [x for x in tokenizer.convert_ids_to_tokens(range(0, tokenizer.vocab_size))]
97
+ special_tokens = [
98
+ tokenizer.pad_token, tokenizer.word_delimiter_token,
99
+ tokenizer.unk_token, tokenizer.bos_token,
100
+ tokenizer.eos_token,
101
+ ]
102
+ non_special_tokens = [x for x in tokens if x not in special_tokens]
103
+ invalid_chars_regex = f"[^\s{re.escape(''.join(set(non_special_tokens)))}]"
104
+ normalize_to_lower = False
105
+ for token in non_special_tokens:
106
+ if token.isalpha() and token.islower():
107
+ normalize_to_lower = True
108
+ break
109
+
110
+ # map function to decode audio
111
+ def map_to_pred(batch, args=args, asr=asr, invalid_chars_regex=invalid_chars_regex, normalize_to_lower=normalize_to_lower):
112
+ prediction = asr(batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s)
113
+
114
+ batch["prediction"] = prediction["text"]
115
+ batch["target"] = normalize_text(batch["sentence"], invalid_chars_regex, normalize_to_lower)
116
+ return batch
117
+
118
+ # run inference on all examples
119
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
120
+
121
+ # filtering out empty targets
122
+ result = result.filter(lambda example: example["target"] != "")
123
+
124
+ # compute and log_results
125
+ # do not change function below
126
+ log_results(result, args)
127
+
128
+
129
+ if __name__ == "__main__":
130
+ parser = argparse.ArgumentParser()
131
+
132
+ parser.add_argument(
133
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
134
+ )
135
+ parser.add_argument(
136
+ "--dataset", type=str, required=True, help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets"
137
+ )
138
+ parser.add_argument(
139
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
140
+ )
141
+ parser.add_argument(
142
+ "--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`"
143
+ )
144
+ parser.add_argument(
145
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to None. For long audio files a good value would be 5.0 seconds."
146
+ )
147
+ parser.add_argument(
148
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to None. For long audio files a good value would be 1.0 seconds."
149
+ )
150
+ parser.add_argument(
151
+ "--log_outputs", action='store_true', help="If defined, write outputs to log file for analysis."
152
+ )
153
+ parser.add_argument(
154
+ "--greedy", action='store_true', help="If defined, the LM will be ignored during inference."
155
+ )
156
+ parser.add_argument(
157
+ "--device",
158
+ type=int,
159
+ default=None,
160
+ help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
161
+ )
162
+ args = parser.parse_args()
163
+
164
+ main(args)
full_eval.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CV 8 - TEST
2
+
3
+ python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset mozilla-foundation/common_voice_8_0 --config it --split test --log_outputs --greedy
4
+ mv log_mozilla-foundation_common_voice_8_0_it_test_predictions.txt log_mozilla-foundation_common_voice_8_0_it_test_predictions_greedy.txt
5
+ mv mozilla-foundation_common_voice_8_0_it_test_eval_results.txt mozilla-foundation_common_voice_8_0_it_test_eval_results_greedy.txt
6
+
7
+ python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset mozilla-foundation/common_voice_8_0 --config it --split test --log_outputs
8
+
9
+ # HF EVENT - DEV
10
+
11
+ python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset speech-recognition-community-v2/dev_data --config it --split validation --chunk_length_s 5.0 --stride_length_s 1.0 --log_outputs --greedy
12
+ mv log_speech-recognition-community-v2_dev_data_it_validation_predictions.txt log_speech-recognition-community-v2_dev_data_it_validation_predictions_greedy.txt
13
+ mv speech-recognition-community-v2_dev_data_it_validation_eval_results.txt speech-recognition-community-v2_dev_data_it_validation_eval_results_greedy.txt
14
+
15
+ python eval.py --model_id jonatasgrosman/wav2vec2-xls-r-1b-italian --dataset speech-recognition-community-v2/dev_data --config it --split validation --chunk_length_s 5.0 --stride_length_s 1.0 --log_outputs
language_model/2gram_It_Hum_no_df1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50c1560a01c4ff13ab253ffc485be66df9c80621e20a7aea52a0377a3804c8b1
3
+ size 51090
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
@@ -0,0 +1,757 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ </s>
2
+ <s>
3
+ a
4
+ ablalisto
5
+ ablelisto
6
+ ablimione
7
+ acco
8
+ aec
9
+ aestrada
10
+ agnamisa
11
+ al
12
+ alba
13
+ alc
14
+ alce
15
+ alceli
16
+ alcelis
17
+ alcelisisisotodoto
18
+ alcelista
19
+ alcelisto
20
+ alcelsivi
21
+ alcesti
22
+ alcestio
23
+ alcestito
24
+ alcezio
25
+ alchelisto
26
+ alecisto
27
+ alga
28
+ ananca
29
+ anca
30
+ anci
31
+ angelo
32
+ angola
33
+ angolo
34
+ anno
35
+ annocole
36
+ appartamento
37
+ appertamento
38
+ apportamento
39
+ appu
40
+ appunta
41
+ appuntamento
42
+ aprile
43
+ ar
44
+ arlo
45
+ arni
46
+ arrosamento
47
+ arrossa
48
+ arrossamento
49
+ arvi
50
+ aspedale
51
+ astate
52
+ astro
53
+ attesa
54
+ avviso
55
+ b
56
+ ba
57
+ babbile
58
+ bacio
59
+ badile
60
+ bale
61
+ bales
62
+ balestra
63
+ bambina
64
+ bambino
65
+ ban
66
+ bandito
67
+ banse
68
+ banzione
69
+ barc
70
+ baril
71
+ barile
72
+ basilico
73
+ bava
74
+ bavo
75
+ be
76
+ begnole
77
+ bepe
78
+ ber
79
+ bersa
80
+ bersaglio
81
+ bestra
82
+ bi
83
+ bia
84
+ bian
85
+ bianca
86
+ bicicletta
87
+ bigliaso
88
+ bignalo
89
+ bignas
90
+ bignaso
91
+ bignolia
92
+ bin
93
+ binboggio
94
+ binca
95
+ bincolo
96
+ binoccolo
97
+ binocolo
98
+ bis
99
+ bismacco
100
+ bismaco
101
+ bismag
102
+ bismaggo
103
+ bismago
104
+ bismo
105
+ bismoggo
106
+ bitto
107
+ bivio
108
+ bo
109
+ bodifi
110
+ bodifico
111
+ bole
112
+ boleggio
113
+ bolifi
114
+ bolifico
115
+ boni
116
+ bonifico
117
+ bor
118
+ borte
119
+ bu
120
+ bufebbu
121
+ bufebe
122
+ bufebu
123
+ buna
124
+ buongiorno
125
+ c
126
+ ca
127
+ cafelat
128
+ caff?
129
+ caffattiera
130
+ caffel
131
+ caffelatte
132
+ caffetteria
133
+ caffettiera
134
+ caglia
135
+ cagliavaro
136
+ cagliralo
137
+ caglirano
138
+ caglirilo
139
+ caglivaro
140
+ calderone
141
+ caliralo
142
+ callivaro
143
+ camer
144
+ camera
145
+ cammera
146
+ campagna
147
+ camposquadra
148
+ can
149
+ candidato
150
+ candito
151
+ canenfrosto
152
+ canentrosto
153
+ canf
154
+ canfo
155
+ canfosto
156
+ canfostro
157
+ canfro
158
+ canfronsto
159
+ canfrosto
160
+ canfrostro
161
+ caniele
162
+ canile
163
+ canindato
164
+ cannefrosto
165
+ cantapesta
166
+ cantello
167
+ capoadra
168
+ capocla
169
+ capoclass
170
+ capoclasse
171
+ capoquardra
172
+ caposcuola
173
+ caposquadra
174
+ caposquodra
175
+ cappello
176
+ capuscola
177
+ car
178
+ care
179
+ caregresto
180
+ cariereta
181
+ carletino
182
+ carli
183
+ carnevale
184
+ carnivoro
185
+ cart
186
+ cartapesta
187
+ cartegresto
188
+ cartellino
189
+ cartello
190
+ cartellone
191
+ cartepes
192
+ cartoncino
193
+ carvelale
194
+ cas
195
+ casa
196
+ casatello
197
+ caso
198
+ cassaforte
199
+ casse
200
+ cassetto
201
+ castello
202
+ cava
203
+ ce
204
+ cedicare
205
+ cegli
206
+ cegliar
207
+ cegliarate
208
+ cegliarte
209
+ cegligrate
210
+ ceglira
211
+ ceglirate
212
+ ceglireta
213
+ celgliarate
214
+ celia
215
+ cellirate
216
+ cellire
217
+ cen
218
+ chedi
219
+ chedimare
220
+ chedinare
221
+ chegliare
222
+ cheglirate
223
+ cher
224
+ chetinere
225
+ chettinere
226
+ chevin
227
+ chia
228
+ chinadire
229
+ chindiare
230
+ ciglilate
231
+ cioco
232
+ co
233
+ codi
234
+ codice
235
+ cofa
236
+ cofano
237
+ cofe
238
+ coffetteria
239
+ coglieralo
240
+ col
241
+ colderone
242
+ colto
243
+ comenta
244
+ cometa
245
+ compagna
246
+ compagno
247
+ compelo
248
+ con
249
+ condidato
250
+ condito
251
+ confettiera
252
+ confi
253
+ confrosto
254
+ contanpesta
255
+ coppe
256
+ cor
257
+ cora
258
+ corageso
259
+ coragnesto
260
+ coragresto
261
+ core
262
+ coregesta
263
+ coregnesta
264
+ coregresta
265
+ coregresto
266
+ coritta
267
+ cornevale
268
+ corrita
269
+ cortapesta
270
+ corvegresta
271
+ costro
272
+ cotro
273
+ cuore
274
+ custro
275
+ da
276
+ dabe
277
+ dabile
278
+ dadile
279
+ dales
280
+ dandi
281
+ danzione
282
+ dape
283
+ das
284
+ dasilico
285
+ dava
286
+ dazione
287
+ de
288
+ degnole
289
+ depe
290
+ dersagl
291
+ destr
292
+ destra
293
+ dete
294
+ dette
295
+ di
296
+ dici
297
+ dicicletta
298
+ diga
299
+ dignaso
300
+ dilo
301
+ dis
302
+ dismaggo
303
+ dismoggo
304
+ dismogo
305
+ diva
306
+ divio
307
+ do
308
+ domenica
309
+ donifico
310
+ dor
311
+ dorca
312
+ dorizza
313
+ du
314
+ duna
315
+ e
316
+ ec
317
+ ecco
318
+ egnamisa
319
+ egnomisa
320
+ egnomista
321
+ elce
322
+ esetate
323
+ espedale
324
+ espegale
325
+ estate
326
+ etteza
327
+ f
328
+ fa
329
+ fafa
330
+ faga
331
+ fal
332
+ fallo
333
+ fame
334
+ fantasma
335
+ far
336
+ fard
337
+ farde
338
+ farfalla
339
+ farfalle
340
+ farmacio
341
+ fatto
342
+ fav
343
+ fava
344
+ felmaglio
345
+ feressa
346
+ fermaglio
347
+ fevubo
348
+ ff
349
+ fff
350
+ fi
351
+ figl
352
+ figlio
353
+ fine
354
+ finestra
355
+ finistra
356
+ fl
357
+ flavestro
358
+ flenastro
359
+ flene
360
+ flenestro
361
+ flenetrego
362
+ flenstro
363
+ flu
364
+ flunestro
365
+ flustro
366
+ fo
367
+ foca
368
+ foce
369
+ foga
370
+ foglia
371
+ foglio
372
+ fonagio
373
+ foneggio
374
+ fore
375
+ forfalle
376
+ fr
377
+ frate
378
+ fratello
379
+ fresemma
380
+ fri
381
+ friermace
382
+ frima
383
+ frimace
384
+ frimache
385
+ frimasce
386
+ frimece
387
+ frimoce
388
+ frostro
389
+ fru
390
+ frutta
391
+ fu
392
+ fube
393
+ fugiaco
394
+ fupebo
395
+ fuvebu
396
+ ga
397
+ gadliralo
398
+ gagliralo
399
+ gairalo
400
+ gal
401
+ galgi
402
+ galgi?
403
+ galialo
404
+ gatto
405
+ ge
406
+ genitore
407
+ gessetto
408
+ gi
409
+ gi?
410
+ gin
411
+ gio
412
+ gioco
413
+ gior
414
+ giossetto
415
+ giostra
416
+ girondolo
417
+ giroton
418
+ girotondo
419
+ girotonondo
420
+ giustra
421
+ glio
422
+ glioveglio
423
+ gliovelo
424
+ glirate
425
+ glofano
426
+ glofeno
427
+ glove
428
+ glovello
429
+ glovelo
430
+ gloveno
431
+ gloverno
432
+ gnalo
433
+ go
434
+ gresto
435
+ grimace
436
+ ignemisa
437
+ ilnegisa
438
+ inlesa
439
+ inquinamento
440
+ insegnamento
441
+ insegnamentorofe
442
+ insegne
443
+ iovelo
444
+ isegnmisa
445
+ istro
446
+ isvelone
447
+ la
448
+ laba
449
+ lad
450
+ lada
451
+ lana
452
+ lars
453
+ larse
454
+ lastra
455
+ lavoro
456
+ le
457
+ les
458
+ lesciacode
459
+ lesciacope
460
+ linea
461
+ listo
462
+ lo
463
+ loanovarro
464
+ loba
465
+ lonacio
466
+ lonaggio
467
+ lonagio
468
+ longio
469
+ lorse
470
+ lu
471
+ lurto
472
+ m
473
+ ma
474
+ macchidante
475
+ machi
476
+ machidante
477
+ maestra
478
+ mano
479
+ mar
480
+ marchi
481
+ marg
482
+ marghe
483
+ margherita
484
+ marghrerita
485
+ mas
486
+ mascere
487
+ maschera
488
+ mascheri
489
+ mase
490
+ matta
491
+ matto
492
+ me
493
+ medaglia
494
+ medaglietta
495
+ medegliatta
496
+ mela
497
+ meno
498
+ meschera
499
+ mese
500
+ mezza
501
+ mezzogior
502
+ mezzogiorno
503
+ mi
504
+ mina
505
+ mine
506
+ minestra
507
+ mini
508
+ ministra
509
+ minitra
510
+ mis
511
+ mo
512
+ mondo
513
+ monondo
514
+ montagna
515
+ mostra
516
+ motocicletta
517
+ munviglio
518
+ munviio
519
+ muviglio
520
+ na
521
+ nachipante
522
+ naso
523
+ natura
524
+ nisciacope
525
+ nisciocope
526
+ no
527
+ nocipante
528
+ nonna
529
+ nonno
530
+ norci
531
+ notte
532
+ nu
533
+ nudo
534
+ nurto
535
+ nutro
536
+ nutto
537
+ nuvola
538
+ o
539
+ obblimione
540
+ obli
541
+ oblibione
542
+ oblie
543
+ oblimi
544
+ oblimione
545
+ oblimo
546
+ offesa
547
+ oggi
548
+ ognamise
549
+ oignomisa
550
+ olblimione
551
+ oli
552
+ olimione
553
+ olimo
554
+ oppinione
555
+ oppuntame
556
+ orlo
557
+ orsamento
558
+ ospedale
559
+ ostro
560
+ p
561
+ pa
562
+ pace
563
+ pachetenta
564
+ pales
565
+ palestra
566
+ pane
567
+ par
568
+ parola
569
+ pasce
570
+ patto
571
+ pe
572
+ pelipo
573
+ pen
574
+ pentito
575
+ per
576
+ pers
577
+ persemma
578
+ perso
579
+ persona
580
+ personale
581
+ perzzemolo
582
+ pesc
583
+ pesce
584
+ pescespada
585
+ pesche
586
+ pez
587
+ piastra
588
+ pista
589
+ po
590
+ pochetaenta
591
+ pochetenta
592
+ poli
593
+ polino
594
+ polipo
595
+ pomo
596
+ pomodoro
597
+ por
598
+ porita
599
+ poritta
600
+ potto
601
+ pradeglia
602
+ pran
603
+ pranzo
604
+ pre
605
+ predeglia
606
+ premme
607
+ premmes
608
+ premmesa
609
+ presa
610
+ presamma
611
+ prese
612
+ presemma
613
+ preveva
614
+ prezze
615
+ prezzemolo
616
+ prezzo
617
+ probeglia
618
+ prodeglia
619
+ prodeia
620
+ prodiglia
621
+ pronto
622
+ puntamento
623
+ radio
624
+ ranviglio
625
+ ranvioglio
626
+ raviglio
627
+ ri
628
+ riposta
629
+ ris
630
+ risagnera
631
+ risegnara
632
+ risognefa
633
+ risposta
634
+ ristogefa
635
+ risugne
636
+ risugnela
637
+ ro
638
+ rofe
639
+ ros
640
+ rossetto
641
+ rove
642
+ rovello
643
+ ru
644
+ rufebu
645
+ rufelu
646
+ sa
647
+ salvelone
648
+ sarmelone
649
+ sarvellone
650
+ sarvelone
651
+ save
652
+ scarpa
653
+ seca
654
+ secca
655
+ seglirate
656
+ sforta
657
+ sfrilo
658
+ sglofeno
659
+ si
660
+ signora
661
+ sinistra
662
+ sinistre
663
+ sinora
664
+ sirfelone
665
+ sirvellone
666
+ sirvelone
667
+ sisagnera
668
+ sisvelone
669
+ so
670
+ soce
671
+ soggiorno
672
+ sogno
673
+ soigiorno
674
+ sollevamento
675
+ sonviglio
676
+ sonviio
677
+ sottori
678
+ sottra
679
+ sottraghe
680
+ sponto
681
+ spor
682
+ ss
683
+ st
684
+ stage
685
+ star
686
+ ste
687
+ sterada
688
+ sto
689
+ str
690
+ stra
691
+ strada
692
+ strage
693
+ strano
694
+ strato
695
+ stre
696
+ strega
697
+ strillo
698
+ strilo
699
+ strin
700
+ stringa
701
+ strizza
702
+ stro
703
+ strof
704
+ strofa
705
+ strova
706
+ sve
707
+ sveglia
708
+ t
709
+ taglia
710
+ tanfo
711
+ tele
712
+ telefono
713
+ ten
714
+ tenantacolo
715
+ tenta
716
+ tentaco
717
+ tentacoli
718
+ tentacolo
719
+ testa
720
+ ti
721
+ tigl
722
+ tiglio
723
+ tiio
724
+ tizio
725
+ trano
726
+ tucca
727
+ tul
728
+ turlo
729
+ ustra
730
+ ustro
731
+ v
732
+ valo
733
+ vaso
734
+ ve
735
+ veglia
736
+ ventaglio
737
+ ventaio
738
+ versaglio
739
+ vetaglio
740
+ vi
741
+ vicino
742
+ vilo
743
+ vizio
744
+ voce
745
+ voglia
746
+ volo
747
+ vu
748
+ vube
749
+ vufebe
750
+ vufebo
751
+ vufebu
752
+ vufedu
753
+ vufpebo
754
+ vufube
755
+ vure
756
+ vusciacope
757
+ vuso
log_mozilla-foundation_common_voice_8_0_it_test_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_mozilla-foundation_common_voice_8_0_it_test_predictions_greedy.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_mozilla-foundation_common_voice_8_0_it_test_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_mozilla-foundation_common_voice_8_0_it_test_targets_greedy.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_speech-recognition-community-v2_dev_data_it_validation_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_speech-recognition-community-v2_dev_data_it_validation_predictions_greedy.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_speech-recognition-community-v2_dev_data_it_validation_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
mozilla-foundation_common_voice_8_0_it_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.0675129084844723
2
+ CER: 0.01761353561922042
mozilla-foundation_common_voice_8_0_it_test_eval_results_greedy.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.09043783123819138
2
+ CER: 0.022088713826821264
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6330c0527196c61ba8df8fec82234a6e74600018d497018d072d33d4c6b315d2
3
+ size 3850533041
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
speech-recognition-community-v2_dev_data_it_validation_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.15843144982511453
2
+ CER: 0.08932029194181024
speech-recognition-community-v2_dev_data_it_validation_eval_results_greedy.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.23380462091728657
2
+ CER: 0.09411978882213727
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": false
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "<pad>",
42
+ "processor_class": "Wav2Vec2ProcessorWithLM",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "<unk>",
47
+ "word_delimiter_token": "|"
48
+ }
vocab.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "'": 5,
3
+ "-": 6,
4
+ "</s>": 2,
5
+ "<pad>": 0,
6
+ "<s>": 1,
7
+ "<unk>": 3,
8
+ "a": 7,
9
+ "b": 8,
10
+ "c": 9,
11
+ "d": 10,
12
+ "e": 11,
13
+ "f": 12,
14
+ "g": 13,
15
+ "h": 14,
16
+ "i": 15,
17
+ "j": 16,
18
+ "k": 17,
19
+ "l": 18,
20
+ "m": 19,
21
+ "n": 20,
22
+ "o": 21,
23
+ "p": 22,
24
+ "q": 23,
25
+ "r": 24,
26
+ "s": 25,
27
+ "t": 26,
28
+ "u": 27,
29
+ "v": 28,
30
+ "w": 29,
31
+ "x": 30,
32
+ "y": 31,
33
+ "z": 32,
34
+ "|": 4,
35
+ "à": 33,
36
+ "á": 34,
37
+ "è": 35,
38
+ "é": 36,
39
+ "ì": 37,
40
+ "í": 38,
41
+ "ò": 39,
42
+ "ó": 40,
43
+ "ù": 41,
44
+ "ú": 42
45
+ }