patrickvonplaten
commited on
Commit
•
4d07fd7
1
Parent(s):
42e9975
Update README.md
Browse files
README.md
CHANGED
@@ -103,7 +103,7 @@ import re
|
|
103 |
test = Dataset.from_csv('test.csv')
|
104 |
|
105 |
|
106 |
-
chars_to_ignore_regex = '[
|
107 |
|
108 |
# Preprocessing the datasets.
|
109 |
# We need to read the audio files as arrays
|
@@ -143,9 +143,14 @@ import torch
|
|
143 |
import librosa
|
144 |
import numpy as np
|
145 |
import re
|
146 |
-
from datasets import load_dataset
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
-
chars_to_ignore_regex = '[
|
149 |
|
150 |
# Preprocessing the datasets.
|
151 |
# We need to read the audio files as arrays
|
103 |
test = Dataset.from_csv('test.csv')
|
104 |
|
105 |
|
106 |
+
chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\।]'
|
107 |
|
108 |
# Preprocessing the datasets.
|
109 |
# We need to read the audio files as arrays
|
143 |
import librosa
|
144 |
import numpy as np
|
145 |
import re
|
146 |
+
from datasets import load_metric, load_dataset, Dataset
|
147 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
148 |
+
|
149 |
+
wer = load_metric("wer")
|
150 |
+
processor = Wav2Vec2Processor.from_pretrained('tanmaylaud/wav2vec2-large-xlsr-hindi-marathi')
|
151 |
+
model = Wav2Vec2ForCTC.from_pretrained('tanmaylaud/wav2vec2-large-xlsr-hindi-marathi').to("cuda")
|
152 |
|
153 |
+
chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\।]'
|
154 |
|
155 |
# Preprocessing the datasets.
|
156 |
# We need to read the audio files as arrays
|