tanmaylaud commited on
Commit
c68f981
1 Parent(s): 3d955d1

updated readme

Browse files
Files changed (1) hide show
  1. README.md +7 -4
README.md CHANGED
@@ -76,8 +76,9 @@ import numpy as np
76
  import re
77
 
78
  wer = load_metric("wer")
79
- processor = Wav2Vec2Processor.from_pretrained(output_models_dir)
80
- model = Wav2Vec2ForCTC.from_pretrained(output_models_dir+'/'+checkpoint)
 
81
  model.to("cuda")
82
 
83
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\।]'
@@ -90,7 +91,7 @@ def speech_file_to_array_fn(batch):
90
  batch["speech"] = speech_array[0].numpy()
91
  batch["sampling_rate"] = sampling_rate
92
  batch["target_text"] = batch["sentence"]
93
- batch["speech"] = librosa.resample(np.asarray(batch["speech"]), 8_000, 16_000)
94
  batch["sampling_rate"] = 16_000
95
  return batch
96
 
@@ -109,4 +110,6 @@ def evaluate(batch):
109
 
110
  result = test.map(evaluate, batched=True, batch_size=32)
111
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["text"])))
112
- ```
 
 
76
  import re
77
 
78
  wer = load_metric("wer")
79
+ processor = Wav2Vec2Processor.from_pretrained("tanmaylaud/wav2vec2-large-xlsr-hindi-marathi")
80
+ model = Wav2Vec2ForCTC.from_pretrained("tanmaylaud/wav2vec2-large-xlsr-hindi-marathi")
81
+
82
  model.to("cuda")
83
 
84
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\।]'
91
  batch["speech"] = speech_array[0].numpy()
92
  batch["sampling_rate"] = sampling_rate
93
  batch["target_text"] = batch["sentence"]
94
+ batch["speech"] = librosa.resample(np.asarray(batch["speech"]), sampling_rate, 16_000)
95
  batch["sampling_rate"] = 16_000
96
  return batch
97
 
110
 
111
  result = test.map(evaluate, batched=True, batch_size=32)
112
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["text"])))
113
+ ```
114
+
115
+ Link to eval notebook : https://colab.research.google.com/drive/1nZRTgKfxCD9cvy90wikTHkg2il3zgcqW#scrollTo=cXWFbhb0d7DT