tanmaylaud
commited on
Commit
•
6750fe1
1
Parent(s):
0c60532
Update README.md
Browse files
README.md
CHANGED
@@ -32,7 +32,9 @@ model-index:
|
|
32 |
Fine-tuned facebook/wav2vec2-large-xlsr-53 on Hindi and Marathi using the OpenSLR SLR64 datasets. When using this model, make sure that your speech input is sampled at 16kHz.
|
33 |
|
34 |
## Installation
|
|
|
35 |
pip install git+https://github.com/huggingface/transformers.git datasets librosa torch==1.7.0 torchaudio==0.7.0 jiwer
|
|
|
36 |
|
37 |
## Eval dataset:
|
38 |
```bash
|
@@ -99,7 +101,7 @@ import re
|
|
99 |
test = Dataset.from_csv('test.csv')
|
100 |
|
101 |
|
102 |
-
chars_to_ignore_regex = '[
|
103 |
|
104 |
# Preprocessing the datasets.
|
105 |
# We need to read the audio files as arrays
|
@@ -139,7 +141,7 @@ import numpy as np
|
|
139 |
import re
|
140 |
from datasets import load_dataset
|
141 |
|
142 |
-
chars_to_ignore_regex = '[
|
143 |
|
144 |
# Preprocessing the datasets.
|
145 |
# We need to read the audio files as arrays
|
|
|
32 |
Fine-tuned facebook/wav2vec2-large-xlsr-53 on Hindi and Marathi using the OpenSLR SLR64 datasets. When using this model, make sure that your speech input is sampled at 16kHz.
|
33 |
|
34 |
## Installation
|
35 |
+
```bash
|
36 |
pip install git+https://github.com/huggingface/transformers.git datasets librosa torch==1.7.0 torchaudio==0.7.0 jiwer
|
37 |
+
```
|
38 |
|
39 |
## Eval dataset:
|
40 |
```bash
|
|
|
101 |
test = Dataset.from_csv('test.csv')
|
102 |
|
103 |
|
104 |
+
chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�\\\\।]'
|
105 |
|
106 |
# Preprocessing the datasets.
|
107 |
# We need to read the audio files as arrays
|
|
|
141 |
import re
|
142 |
from datasets import load_dataset
|
143 |
|
144 |
+
chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�\\\\।]'
|
145 |
|
146 |
# Preprocessing the datasets.
|
147 |
# We need to read the audio files as arrays
|