jonatasgrosman
commited on
Commit
•
a915467
1
Parent(s):
8f49fca
update README
Browse files
README.md
CHANGED
@@ -24,11 +24,10 @@ model-index:
|
|
24 |
metrics:
|
25 |
- name: Test WER
|
26 |
type: wer
|
27 |
-
value:
|
28 |
- name: Test CER
|
29 |
type: cer
|
30 |
-
value: 3.
|
31 |
-
|
32 |
---
|
33 |
|
34 |
# Wav2Vec2-Large-XLSR-53-German
|
@@ -102,15 +101,12 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
102 |
LANG_ID = "de"
|
103 |
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-german"
|
104 |
DEVICE = "cuda"
|
105 |
-
MAX_SAMPLES = 8000
|
106 |
|
107 |
CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
|
108 |
"؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
|
109 |
"=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
|
110 |
|
111 |
test_dataset = load_dataset("common_voice", LANG_ID, split="test")
|
112 |
-
if len(test_dataset) > MAX_SAMPLES:
|
113 |
-
test_dataset = test_dataset.select(range(MAX_SAMPLES))
|
114 |
|
115 |
wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
|
116 |
cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
|
@@ -153,5 +149,5 @@ print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"],
|
|
153 |
|
154 |
**Test Result**:
|
155 |
|
156 |
-
- WER:
|
157 |
-
- CER: 3.
|
|
|
24 |
metrics:
|
25 |
- name: Test WER
|
26 |
type: wer
|
27 |
+
value: 11.85
|
28 |
- name: Test CER
|
29 |
type: cer
|
30 |
+
value: 3.17
|
|
|
31 |
---
|
32 |
|
33 |
# Wav2Vec2-Large-XLSR-53-German
|
|
|
101 |
LANG_ID = "de"
|
102 |
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-german"
|
103 |
DEVICE = "cuda"
|
|
|
104 |
|
105 |
CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
|
106 |
"؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
|
107 |
"=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
|
108 |
|
109 |
test_dataset = load_dataset("common_voice", LANG_ID, split="test")
|
|
|
|
|
110 |
|
111 |
wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
|
112 |
cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
|
|
|
149 |
|
150 |
**Test Result**:
|
151 |
|
152 |
+
- WER: 11.85%
|
153 |
+
- CER: 3.17%
|