SLPL
/

Sharif-wav2vec2

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

SaraSadeghi commited on Sep 4, 2022

Commit

4a36ad9

·

1 Parent(s): feb891f

Update README.md

Files changed (1) hide show

README.md +44 -2

README.md CHANGED Viewed

@@ -76,7 +76,49 @@ print(prediction[0])
 ```
 ## Evaluation
 For the evaluation use the code below:
 ```python
 ?
@@ -86,7 +128,7 @@ For the evaluation use the code below:
 | clean | other |
 |---|---|
-| 3.4 | 8.6 |
 ## Citation

 ```
 ## Evaluation
+pip install datasets
+pip install transformers
+import torch
+import torchaudio
+import librosa
+from datasets import load_dataset,load_metric
+import numpy as np
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+from transformers import Wav2Vec2ProcessorWithLM
+model = Wav2Vec2ForCTC.from_pretrained("SLPL/Sharif-wav2vec2")
+processor = Wav2Vec2ProcessorWithLM.from_pretrained("SLPL/Sharif-wav2vec2")
+def speech_file_to_array_fn(batch):
+    speech_array, sampling_rate = torchaudio.load(batch["path"])
+    speech_array = speech_array.squeeze().numpy()
+    speech_array = librosa.resample(np.asarray(speech_array), sampling_rate, processor.feature_extractor.sampling_rate)
+    batch["speech"] = speech_array
+    return batch
+def predict(batch):
+    features = processor(
+        batch["speech"],
+        sampling_rate=processor.feature_extractor.sampling_rate,
+        return_tensors="pt",
+        padding=True
+    )
+    input_values = features.input_values
+    attention_mask = features.attention_mask
+    with torch.no_grad():
+        logits = model(input_values, attention_mask=attention_mask).logits #when we are trying to load model with LM we have to use logits instead of argmax(logits)
+    batch["prediction"] = processor.batch_decode(logits.numpy()).text
+    return batch
+dataset = load_dataset("csv", data_files={"test":"path/to/your.csv"}, delimiter=",")["test"]
+dataset = dataset.map(speech_file_to_array_fn)
+result = dataset.map(predict, batched=True, batch_size=4)
+wer = load_metric("wer")
+cer = load_metric("cer")
+print("WER: {:.2f}".format(100 * wer.compute(predictions=result["prediction"], references=result["reference"])))
+print("CER: {:.2f}".format(100 * cer.compute(predictions=result["prediction"], references=result["reference"])))
 For the evaluation use the code below:
 ```python
 ?
 | clean | other |
 |---|---|
+| 6.0 | 16.4 |
 ## Citation