tanmaylaud commited on
Commit
42e9975
1 Parent(s): 0a1bc74

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -25,7 +25,7 @@ model-index:
25
  metrics:
26
  - name: Test WER
27
  type: wer
28
- value: 24.944955
29
  ---
30
 
31
  # Wav2Vec2-Large-XLSR-53-Hindi-Marathi
@@ -103,7 +103,7 @@ import re
103
  test = Dataset.from_csv('test.csv')
104
 
105
 
106
- chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�\\\\\\\\।]'
107
 
108
  # Preprocessing the datasets.
109
  # We need to read the audio files as arrays
@@ -145,7 +145,7 @@ import numpy as np
145
  import re
146
  from datasets import load_dataset
147
 
148
- chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�\\\\\\\\।]'
149
 
150
  # Preprocessing the datasets.
151
  # We need to read the audio files as arrays
@@ -179,7 +179,7 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=test_data["pred_strings"
179
 
180
  Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
181
 
182
- WER : 24.944955% (OpenSLR Hindi+Marathi Test set : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
183
 
184
 
185
- WER: 49.303944% (Common Voice Hindi Test Split)
25
  metrics:
26
  - name: Test WER
27
  type: wer
28
+ value: 23.736641
29
  ---
30
 
31
  # Wav2Vec2-Large-XLSR-53-Hindi-Marathi
103
  test = Dataset.from_csv('test.csv')
104
 
105
 
106
+ chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
107
 
108
  # Preprocessing the datasets.
109
  # We need to read the audio files as arrays
145
  import re
146
  from datasets import load_dataset
147
 
148
+ chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
149
 
150
  # Preprocessing the datasets.
151
  # We need to read the audio files as arrays
179
 
180
  Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
181
 
182
+ WER : 23.736641% (OpenSLR Hindi+Marathi Test set : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
183
 
184
 
185
+ WER: 44.083527% (Common Voice Hindi Test Split)