tanmaylaud
/

wav2vec2-large-xlsr-hindi-marathi

Automatic Speech Recognition

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

tanmaylaud commited on Mar 31, 2021

Commit

42e9975

•

1 Parent(s): 0a1bc74

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -25,7 +25,7 @@ model-index:
     metrics:
        - name: Test WER
          type: wer
-         value: 24.944955
 ---
 # Wav2Vec2-Large-XLSR-53-Hindi-Marathi
@@ -103,7 +103,7 @@ import re
 test = Dataset.from_csv('test.csv')
-chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�\\\\\\\\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
@@ -145,7 +145,7 @@ import numpy as np
 import re
 from datasets import load_dataset
-chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�\\\\\\\\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
@@ -179,7 +179,7 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=test_data["pred_strings"
 Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
-WER :  24.944955% (OpenSLR Hindi+Marathi Test set : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
-WER: 49.303944% (Common Voice Hindi Test Split)

     metrics:
        - name: Test WER
          type: wer
+         value: 23.736641
 ---
 # Wav2Vec2-Large-XLSR-53-Hindi-Marathi
 test = Dataset.from_csv('test.csv')
+chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 import re
 from datasets import load_dataset
+chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
+WER :  23.736641% (OpenSLR Hindi+Marathi Test set : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
+WER: 44.083527% (Common Voice Hindi Test Split)