tanmaylaud
/

wav2vec2-large-xlsr-hindi-marathi

@@ -35,15 +35,16 @@ Fine-tuned facebook/wav2vec2-large-xlsr-53 on Hindi and Marathi using the OpenSL
 pip install git+https://github.com/huggingface/transformers.git datasets librosa torch==1.7.0 torchaudio==0.7.0 jiwer
 ## Eval dataset:
-!wget https://www.openslr.org/resources/103/Marathi_test.zip  -P data/marathi
-!unzip -P "K3[2?do9" data/marathi/Marathi_test.zip -d data/marathi/.
-!tar -xzf data/marathi/Marathi_test.tar.gz -C data/marathi/.
-!wget https://www.openslr.org/resources/103/Hindi_test.zip  -P data/hindi
-!unzip -P "w9I2{3B*" data/hindi/Hindi_test.zip -d data/hindi/.
-!tar -xzf data/hindi/Hindi_test.tar.gz -C data/hindi/.
-!wget  -O test.csv 'https://filebin.net/snrz6bt13usv8w2e/test_large.csv?t=ps3n99ho'
-If download does not work, paste this link in browser: https://filebin.net/snrz6bt13usv8w2e/test_large.csv
 ## Usage
  The model can be used directly (without a language model) as follows, assuming you have a dataset with Marathi text and path fields:
@@ -87,7 +88,7 @@ print("Prediction:", processor.batch_decode(predicted_ids))
 print("Reference:", test_data["text"][:2])
 ```
-#Code For Evaluation on OpenSLR (Hindi + Marathi : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
 ```python
 import torchaudio
 import torch
@@ -98,7 +99,7 @@ import re
 test = Dataset.from_csv('test.csv')
-chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
@@ -138,7 +139,7 @@ import numpy as np
 import re
 from datasets import load_dataset
-chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
@@ -172,4 +173,5 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=test_data["pred_strings"
 Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
-WER :  24.944955% (Lesser is better)

 pip install git+https://github.com/huggingface/transformers.git datasets librosa torch==1.7.0 torchaudio==0.7.0 jiwer
 ## Eval dataset:
+```bash
+wget https://www.openslr.org/resources/103/Marathi_test.zip  -P data/marathi
+unzip -P "K3[2?do9" data/marathi/Marathi_test.zip -d data/marathi/.
+tar -xzf data/marathi/Marathi_test.tar.gz -C data/marathi/.
+wget https://www.openslr.org/resources/103/Hindi_test.zip  -P data/hindi
+unzip -P "w9I2{3B*" data/hindi/Hindi_test.zip -d data/hindi/.
+tar -xzf data/hindi/Hindi_test.tar.gz -C data/hindi/.
+wget  -O test.csv 'https://filebin.net/snrz6bt13usv8w2e/test_large.csv?t=ps3n99ho'
+#If download does not work, paste this link in browser: https://filebin.net/snrz6bt13usv8w2e/test_large.csv
+```
 ## Usage
  The model can be used directly (without a language model) as follows, assuming you have a dataset with Marathi text and path fields:
 print("Reference:", test_data["text"][:2])
 ```
+# Code For Evaluation on OpenSLR (Hindi + Marathi : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
 ```python
 import torchaudio
 import torch
 test = Dataset.from_csv('test.csv')
+chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“\\%\\‘\\”\\�\\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 import re
 from datasets import load_dataset
+chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“\\%\\‘\\”\\�\\।]'
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT
+WER :  24.944955% (OpenSLR Hindi+Marathi Test set : https://filebin.net/snrz6bt13usv8w2e/test_large.csv)
+WER: 49.303944% (Common Voice Hindi Test Split)