comodoro commited on
Commit
27bf67c
1 Parent(s): d3ffde3

Fix eval script and readme

Browse files
Files changed (2) hide show
  1. README.md +3 -3
  2. eval.py +1 -1
README.md CHANGED
@@ -22,10 +22,10 @@ model-index:
22
  metrics:
23
  - name: Test WER
24
  type: wer
25
- value: 55.2
26
  - name: Test CER
27
  type: cer
28
- value: 14.4
29
  ---
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
31
  should probably proofread and complete it, then remove this comment. -->
@@ -77,7 +77,7 @@ print("Reference:", test_dataset[:2]["sentence"])
77
 
78
  The model can be evaluated using the attached `eval.py` script:
79
  ```
80
- python eval.py --model_id comodoro/wav2vec2-xls-r-300m-sk-cv8 --dataset mozilla-foundation/common-voice_8_0 --split test --config sk
81
  ```
82
 
83
  ## Training and evaluation data
 
22
  metrics:
23
  - name: Test WER
24
  type: wer
25
+ value: 59.5
26
  - name: Test CER
27
  type: cer
28
+ value: 15.6
29
  ---
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
31
  should probably proofread and complete it, then remove this comment. -->
 
77
 
78
  The model can be evaluated using the attached `eval.py` script:
79
  ```
80
+ python eval.py --model_id comodoro/wav2vec2-xls-r-300m-sk-cv8 --dataset mozilla-foundation/common_voice_8_0 --split test --config sk
81
  ```
82
 
83
  ## Training and evaluation data
eval.py CHANGED
@@ -91,7 +91,7 @@ def normalize_text(text: str) -> str:
91
  text = unicodedata.normalize('NFKC', text)
92
  # remove punctuation
93
  text = re.sub(chars_to_ignore_regex, "", text)
94
- batch["sentence"] = replace_chars(batch['sentence'])
95
 
96
  # Let's also make sure we split on all kinds of newlines, spaces, etc...
97
  text = " ".join(text.split())
 
91
  text = unicodedata.normalize('NFKC', text)
92
  # remove punctuation
93
  text = re.sub(chars_to_ignore_regex, "", text)
94
+ text = replace_chars(text)
95
 
96
  # Let's also make sure we split on all kinds of newlines, spaces, etc...
97
  text = " ".join(text.split())