patrickvonplaten commited on
Commit
4d07fd7
1 Parent(s): 42e9975

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -3
README.md CHANGED
@@ -103,7 +103,7 @@ import re
103
  test = Dataset.from_csv('test.csv')
104
 
105
 
106
- chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
107
 
108
  # Preprocessing the datasets.
109
  # We need to read the audio files as arrays
@@ -143,9 +143,14 @@ import torch
143
  import librosa
144
  import numpy as np
145
  import re
146
- from datasets import load_dataset
 
 
 
 
 
147
 
148
- chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\।]'
149
 
150
  # Preprocessing the datasets.
151
  # We need to read the audio files as arrays
103
  test = Dataset.from_csv('test.csv')
104
 
105
 
106
+ chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\।]'
107
 
108
  # Preprocessing the datasets.
109
  # We need to read the audio files as arrays
143
  import librosa
144
  import numpy as np
145
  import re
146
+ from datasets import load_metric, load_dataset, Dataset
147
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
148
+
149
+ wer = load_metric("wer")
150
+ processor = Wav2Vec2Processor.from_pretrained('tanmaylaud/wav2vec2-large-xlsr-hindi-marathi')
151
+ model = Wav2Vec2ForCTC.from_pretrained('tanmaylaud/wav2vec2-large-xlsr-hindi-marathi').to("cuda")
152
 
153
+ chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\!\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\-\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\:\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\“\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\%\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\‘\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\”\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\�\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\।]'
154
 
155
  # Preprocessing the datasets.
156
  # We need to read the audio files as arrays