codesnippets / create_confidence_scores.py
patrickvonplaten's picture
up
685ce0f
raw history blame
No virus
1.18 kB
#!/usr/bin/env python3
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from datasets import load_dataset
import datasets
import torch
model = Wav2Vec2ForCTC.from_pretrained("facebook/data2vec-audio-base-10m")
processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-10m")
minds14 = load_dataset("PolyAI/minds14", "en-US", split="train")
minds14 = minds14.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_values = processor(minds14[0]["audio"]["array"], return_tensors="pt", sampling_rate=minds14[0]["audio"]["sampling_rate"]).input_values
with torch.no_grad():
logits = model(input_values).logits
scores = torch.nn.functional.softmax(logits, dim=-1)
pred_ids = torch.argmax(logits, dim=-1)
pred_scores = scores.gather(1, pred_ids.unsqueeze(-1))[:, :, 0]
output = processor.batch_decode(pred_ids, output_word_offsets=True)
# add confidence
def confidence_score(word_dict):
probs = pred_scores[0, word_dict["start_offset"]: word_dict["end_offset"]]
return torch.mean(probs)
output["confidence_scores"] = {d["word"]: confidence_score(d) for d in output.word_offsets[0]}
print(output["confidence_scores"])