jonatasgrosman commited on
Commit
a915467
1 Parent(s): 8f49fca

update README

Browse files
Files changed (1) hide show
  1. README.md +4 -8
README.md CHANGED
@@ -24,11 +24,10 @@ model-index:
24
  metrics:
25
  - name: Test WER
26
  type: wer
27
- value: 13.32
28
  - name: Test CER
29
  type: cer
30
- value: 3.71
31
-
32
  ---
33
 
34
  # Wav2Vec2-Large-XLSR-53-German
@@ -102,15 +101,12 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
102
  LANG_ID = "de"
103
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-german"
104
  DEVICE = "cuda"
105
- MAX_SAMPLES = 8000
106
 
107
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
108
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
109
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
110
 
111
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
112
- if len(test_dataset) > MAX_SAMPLES:
113
- test_dataset = test_dataset.select(range(MAX_SAMPLES))
114
 
115
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
116
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
@@ -153,5 +149,5 @@ print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"],
153
 
154
  **Test Result**:
155
 
156
- - WER: 13.32%
157
- - CER: 3.71%
 
24
  metrics:
25
  - name: Test WER
26
  type: wer
27
+ value: 11.85
28
  - name: Test CER
29
  type: cer
30
+ value: 3.17
 
31
  ---
32
 
33
  # Wav2Vec2-Large-XLSR-53-German
 
101
  LANG_ID = "de"
102
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-german"
103
  DEVICE = "cuda"
 
104
 
105
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
106
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
107
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
108
 
109
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
 
 
110
 
111
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
112
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
 
149
 
150
  **Test Result**:
151
 
152
+ - WER: 11.85%
153
+ - CER: 3.17%