Commit
·
b92cd0d
1
Parent(s):
d400ae7
adjust README
Browse files
README.md
CHANGED
@@ -78,9 +78,10 @@ The model can be evaluated as follows on the hungarian test data of Common Voice
|
|
78 |
|
79 |
```python
|
80 |
import torch
|
|
|
|
|
81 |
from datasets import load_dataset, load_metric
|
82 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
83 |
-
import re
|
84 |
|
85 |
LANG_ID = "hu"
|
86 |
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-hungarian"
|
@@ -88,7 +89,7 @@ DEVICE = "cuda"
|
|
88 |
|
89 |
CHARS_TO_IGNORE = [",", "?", ".", "!", "-", ";", ":", '""', "%", "'", '"', "�", "ʿ", "·", "჻", "¿", "¡", "~", "՞",
|
90 |
"؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
|
91 |
-
"=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ"
|
92 |
|
93 |
test_dataset = load_dataset("common_voice", LANG_ID, split="test")
|
94 |
wer = load_metric("wer")
|
|
|
78 |
|
79 |
```python
|
80 |
import torch
|
81 |
+
import re
|
82 |
+
import librosa
|
83 |
from datasets import load_dataset, load_metric
|
84 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
|
85 |
|
86 |
LANG_ID = "hu"
|
87 |
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-hungarian"
|
|
|
89 |
|
90 |
CHARS_TO_IGNORE = [",", "?", ".", "!", "-", ";", ":", '""', "%", "'", '"', "�", "ʿ", "·", "჻", "¿", "¡", "~", "՞",
|
91 |
"؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
|
92 |
+
"=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ"]
|
93 |
|
94 |
test_dataset = load_dataset("common_voice", LANG_ID, split="test")
|
95 |
wer = load_metric("wer")
|