tanmaylaud
commited on
Commit
•
42e9975
1
Parent(s):
0a1bc74
Update README.md
Browse files
README.md
CHANGED
@@ -25,7 +25,7 @@ model-index:
|
|
25 |
metrics:
|
26 |
- name: Test WER
|
27 |
type: wer
|
28 |
-
value:
|
29 |
---
|
30 |
|
31 |
# Wav2Vec2-Large-XLSR-53-Hindi-Marathi
|
@@ -103,7 +103,7 @@ import re
|
|
103 |
test = Dataset.from_csv('test.csv')
|
104 |
|
105 |
|
106 |
-
chars_to_ignore_regex = '[
|
107 |
|
108 |
# Preprocessing the datasets.
|
109 |
# We need to read the audio files as arrays
|
@@ -145,7 +145,7 @@ import numpy as np
|
|
145 |
import re
|
146 |
from datasets import load_dataset
|
147 |
|
148 |
-
chars_to_ignore_regex = '[
|
149 |
|
150 |
# Preprocessing the datasets.
|
151 |
# We need to read the audio files as arrays
|
@@ -179,7 +179,7 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=test_data["pred_strings"
|
|
179 |
|
180 |
Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
|
181 |
|
182 |
-
WER :
|
183 |
|
184 |
|
185 |
-
WER:
|
25 |
metrics:
|
26 |
- name: Test WER
|
27 |
type: wer
|
28 |
+
value: 23.736641
|
29 |
---
|
30 |
|
31 |
# Wav2Vec2-Large-XLSR-53-Hindi-Marathi
|
103 |
test = Dataset.from_csv('test.csv')
|
104 |
|
105 |
|
106 |
+
chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
|
107 |
|
108 |
# Preprocessing the datasets.
|
109 |
# We need to read the audio files as arrays
|
145 |
import re
|
146 |
from datasets import load_dataset
|
147 |
|
148 |
+
chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
|
149 |
|
150 |
# Preprocessing the datasets.
|
151 |
# We need to read the audio files as arrays
|
179 |
|
180 |
Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
|
181 |
|
182 |
+
WER : 23.736641% (OpenSLR Hindi+Marathi Test set : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
|
183 |
|
184 |
|
185 |
+
WER: 44.083527% (Common Voice Hindi Test Split)
|