MengniWang
commited on
Commit
•
67f2594
1
Parent(s):
0b27f4e
add code
Browse files
README.md
CHANGED
@@ -48,6 +48,46 @@ Download the model by cloning the repository:
|
|
48 |
git clone https://huggingface.co/Intel/whisper-large-int8-dynamic
|
49 |
```
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
## Metrics (Model Performance):
|
52 |
| Model | Model Size (GB) | wer |
|
53 |
|---|:---:|:---:|
|
|
|
48 |
git clone https://huggingface.co/Intel/whisper-large-int8-dynamic
|
49 |
```
|
50 |
|
51 |
+
Evaluate the model with below code:
|
52 |
+
```python
|
53 |
+
import os
|
54 |
+
from evaluate import load
|
55 |
+
from datasets import load_dataset
|
56 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor, AutoConfig
|
57 |
+
|
58 |
+
model_name = 'openai/whisper-large'
|
59 |
+
model_path = 'whisper-large-int8-dynamic'
|
60 |
+
processor = WhisperProcessor.from_pretrained(model_name)
|
61 |
+
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
62 |
+
config = AutoConfig.from_pretrained(model_name)
|
63 |
+
wer = load("wer")
|
64 |
+
librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test")
|
65 |
+
|
66 |
+
from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
|
67 |
+
from transformers import PretrainedConfig
|
68 |
+
model_config = PretrainedConfig.from_pretrained(model_name)
|
69 |
+
predictions = []
|
70 |
+
references = []
|
71 |
+
sessions = ORTModelForSpeechSeq2Seq.load_model(
|
72 |
+
os.path.join(model_path, 'encoder_model.onnx'),
|
73 |
+
os.path.join(model_path, 'decoder_model.onnx'),
|
74 |
+
os.path.join(model_path, 'decoder_with_past_model.onnx'))
|
75 |
+
model = ORTModelForSpeechSeq2Seq(sessions[0], sessions[1], model_config, model_path, sessions[2])
|
76 |
+
for idx, batch in enumerate(librispeech_test_clean):
|
77 |
+
audio = batch["audio"]
|
78 |
+
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
79 |
+
reference = processor.tokenizer._normalize(batch['text'])
|
80 |
+
references.append(reference)
|
81 |
+
predicted_ids = model.generate(input_features)[0]
|
82 |
+
transcription = processor.decode(predicted_ids)
|
83 |
+
prediction = processor.tokenizer._normalize(transcription)
|
84 |
+
predictions.append(prediction)
|
85 |
+
wer_result = wer.compute(references=references, predictions=predictions)
|
86 |
+
print(f"Result wer: {wer_result * 100}")
|
87 |
+
accuracy = 1 - wer_result
|
88 |
+
print("Accuracy: %.5f" % accuracy)
|
89 |
+
```
|
90 |
+
|
91 |
## Metrics (Model Performance):
|
92 |
| Model | Model Size (GB) | wer |
|
93 |
|---|:---:|:---:|
|