import torch | |
from transformers import pipeline | |
model_id = 'distil-whisper/distil-large-v2' | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model_id, | |
chunk_length_s=15 | |
) | |
# os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python' | |
# os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1' | |
# os.environ['TRANSFORMERS_VERBOSITY'] = 'error' | |
def score_audio(audio_path, true_result): | |
true_result = true_result.split('/') | |
transcription = pipe(audio_path)['text'].lower() | |
result = {'transcription': transcription, | |
'score': int(any([x.lower() in transcription for x in true_result])), | |
} | |
return result | |