patrickvonplaten commited on
Commit
266740b
1 Parent(s): fd6acee

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -1
README.md CHANGED
@@ -60,4 +60,47 @@ To transcribe audio files the model can be used as a standalone acoustic model a
60
  # take argmax and decode
61
  predicted_ids = torch.argmax(logits, dim=-1)
62
  transcription = tokenizer.batch_decode(predicted_ids)
63
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  # take argmax and decode
61
  predicted_ids = torch.argmax(logits, dim=-1)
62
  transcription = tokenizer.batch_decode(predicted_ids)
63
+ ```
64
+
65
+ ## Evalution
66
+
67
+ This is a short script to show how the dataset can be evaluated on librispeech "clean" and "other" test data.
68
+
69
+ ```python
70
+ from datasets import load_dataset
71
+ from transformers import Wav2Vec2ForMaskedLM, Wav2Vec2Tokenizer
72
+ import soundfile as sf
73
+ import torch
74
+ from jiwer import wer
75
+
76
+
77
+ librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
78
+
79
+ model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-base-960h").to("cuda")
80
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
81
+
82
+ def map_to_array(batch):
83
+ speech, _ = sf.read(batch["file"])
84
+ batch["speech"] = speech
85
+ return batch
86
+
87
+ librispeech_eval = librispeech_eval.map(map_to_array)
88
+
89
+ def map_to_pred(batch):
90
+ input_values = tokenizer(batch["speech"], return_tensors="pt", padding="longest").input_values
91
+ with torch.no_grad():
92
+ logits = model(input_values.to("cuda")).logits
93
+
94
+ predicted_ids = torch.argmax(logits, dim=-1)
95
+ transcription = tokenizer.batch_decode(predicted_ids)
96
+ batch["transcription"] = transcription
97
+ return batch
98
+
99
+ result = librispeech_eval.map(map_to_pred, batched=True, batch_size=16, remove_columns=["speech"])
100
+
101
+ print("WER:", wer(result["text"], result["transcription"]))
102
+ ```
103
+
104
+ | "clean" | "other" |
105
+ |---|---|
106
+ | 4.1 | |