jonatasgrosman commited on
Commit
ab1e0f4
1 Parent(s): d12252b

update README

Browse files
Files changed (1) hide show
  1. README.md +13 -7
README.md CHANGED
@@ -27,7 +27,7 @@ model-index:
27
  value: 13.60
28
  - name: Test CER
29
  type: cer
30
- value: 8.12
31
  ---
32
 
33
  # Wav2Vec2-Large-XLSR-53-Dutch
@@ -101,12 +101,16 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
101
  LANG_ID = "nl"
102
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-dutch"
103
  DEVICE = "cuda"
 
104
 
105
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
106
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
107
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
108
 
109
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
 
 
 
110
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
111
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
112
 
@@ -119,9 +123,11 @@ model.to(DEVICE)
119
  # Preprocessing the datasets.
120
  # We need to read the audio files as arrays
121
  def speech_file_to_array_fn(batch):
122
- batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
123
- speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
 
124
  batch["speech"] = speech_array
 
125
  return batch
126
 
127
  test_dataset = test_dataset.map(speech_file_to_array_fn)
@@ -138,13 +144,13 @@ def evaluate(batch):
138
  batch["pred_strings"] = processor.batch_decode(pred_ids)
139
  return batch
140
 
141
- result = test_dataset.map(evaluate, batched=True, batch_size=32)
142
 
143
- print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
144
- print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=8000)))
145
  ```
146
 
147
  **Test Result**:
148
 
149
  - WER: 13.60%
150
- - CER: 8.12%
 
27
  value: 13.60
28
  - name: Test CER
29
  type: cer
30
+ value: 4.45
31
  ---
32
 
33
  # Wav2Vec2-Large-XLSR-53-Dutch
 
101
  LANG_ID = "nl"
102
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-dutch"
103
  DEVICE = "cuda"
104
+ MAX_SAMPLES = 8000
105
 
106
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
107
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
108
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
109
 
110
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
111
+ if len(test_dataset) > MAX_SAMPLES:
112
+ test_dataset = test_dataset.select(range(MAX_SAMPLES))
113
+
114
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
115
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
116
 
 
123
  # Preprocessing the datasets.
124
  # We need to read the audio files as arrays
125
  def speech_file_to_array_fn(batch):
126
+ with warnings.catch_warnings():
127
+ warnings.simplefilter("ignore")
128
+ speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
129
  batch["speech"] = speech_array
130
+ batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
131
  return batch
132
 
133
  test_dataset = test_dataset.map(speech_file_to_array_fn)
 
144
  batch["pred_strings"] = processor.batch_decode(pred_ids)
145
  return batch
146
 
147
+ result = test_dataset.map(evaluate, batched=True, batch_size=8)
148
 
149
+ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
150
+ print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
151
  ```
152
 
153
  **Test Result**:
154
 
155
  - WER: 13.60%
156
+ - CER: 4.45%