Update README.md
Browse files
README.md
CHANGED
@@ -170,6 +170,7 @@ import torchaudio
|
|
170 |
from datasets import load_dataset, load_metric
|
171 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
172 |
import re
|
|
|
173 |
ENCODER = {
|
174 |
"ia ": "iê ",
|
175 |
"ìa ": "iề ",
|
@@ -272,15 +273,16 @@ ENCODER = {
|
|
272 |
"ưỡ": "\xa6",
|
273 |
"ượ": "\xa7",
|
274 |
}
|
275 |
-
|
276 |
def decode_string(x):
|
277 |
for k, v in list(reversed(list(ENCODER.items()))):
|
278 |
x = x.replace(v, k)
|
279 |
return x
|
|
|
280 |
test_dataset = load_dataset("common_voice", "vi", split="test")
|
281 |
wer = load_metric("wer")
|
282 |
-
processor = Wav2Vec2Processor.from_pretrained(
|
283 |
-
model = Wav2Vec2ForCTC.from_pretrained(
|
284 |
model.to("cuda")
|
285 |
|
286 |
chars_to_ignore_regex = '[\\\+\@\ǀ\,\?\.\!\-\;\:\"\“\%\‘\”\�]'
|
@@ -294,7 +296,7 @@ def speech_file_to_array_fn(batch):
|
|
294 |
batch["speech"] = resampler(speech_array).squeeze().numpy()
|
295 |
return batch
|
296 |
|
297 |
-
|
298 |
# Preprocessing the datasets.
|
299 |
# We need to read the aduio files as arrays
|
300 |
def evaluate(batch):
|
@@ -306,6 +308,7 @@ def evaluate(batch):
|
|
306 |
# decode_string: We replace the encoded letter with the initial letters
|
307 |
batch["pred_strings"] = [decode_string(x) for x in batch["pred_strings"]]
|
308 |
return batch
|
|
|
309 |
result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
310 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
311 |
```
|
|
|
170 |
from datasets import load_dataset, load_metric
|
171 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
172 |
import re
|
173 |
+
|
174 |
ENCODER = {
|
175 |
"ia ": "iê ",
|
176 |
"ìa ": "iề ",
|
|
|
273 |
"ưỡ": "\xa6",
|
274 |
"ượ": "\xa7",
|
275 |
}
|
276 |
+
|
277 |
def decode_string(x):
|
278 |
for k, v in list(reversed(list(ENCODER.items()))):
|
279 |
x = x.replace(v, k)
|
280 |
return x
|
281 |
+
|
282 |
test_dataset = load_dataset("common_voice", "vi", split="test")
|
283 |
wer = load_metric("wer")
|
284 |
+
processor = Wav2Vec2Processor.from_pretrained("Nhut/wav2vec2-large-xlsr-vietnamese")
|
285 |
+
model = Wav2Vec2ForCTC.from_pretrained("Nhut/wav2vec2-large-xlsr-vietnamese")
|
286 |
model.to("cuda")
|
287 |
|
288 |
chars_to_ignore_regex = '[\\\+\@\ǀ\,\?\.\!\-\;\:\"\“\%\‘\”\�]'
|
|
|
296 |
batch["speech"] = resampler(speech_array).squeeze().numpy()
|
297 |
return batch
|
298 |
|
299 |
+
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
300 |
# Preprocessing the datasets.
|
301 |
# We need to read the aduio files as arrays
|
302 |
def evaluate(batch):
|
|
|
308 |
# decode_string: We replace the encoded letter with the initial letters
|
309 |
batch["pred_strings"] = [decode_string(x) for x in batch["pred_strings"]]
|
310 |
return batch
|
311 |
+
|
312 |
result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
313 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
314 |
```
|