Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import gradio as gr | |
import whisper | |
wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") | |
wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french") | |
whisper_model = whisper.load_model("base") | |
def transcribe_audio(language=None, mic=None, file=None): | |
print(language) | |
if mic is not None: | |
audio = mic | |
elif file is not None: | |
audio = file | |
else: | |
return "You must either provide a mic recording or a file" | |
wav2vec_model = load_models(language) | |
transcription = wav2vec_model(audio)["text"] | |
transcription2 = whisper_model.transcribe(audio, language=language)["text"] | |
return transcription, transcription2 | |
def load_models(lang): | |
if lang == 'en': | |
return wav2vec_en_model | |
elif lang == 'fr': | |
return wav2vec_fr_model | |
else: | |
# default english | |
return wav2vec_en_model | |
title = "Speech2text comparison (Wav2vec vs Whisper)" | |
description = """ | |
This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n | |
(Even if Whisper includes a language detection, here we have decided to select the language to speed up the computation and to focus only on the quality of the transcriptions. The default language is english) | |
""" | |
article = "Check out [the OpenAI Whisper model](https://github.com/openai/whisper) and [the Facebook Wav2vec model](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) that this demo is based off of." | |
examples = [["english_sentence.flac"], ["2022-a-Droite-un-fauteuil-pour-trois-3034044.mp3000.mp3"]] | |
gr.Interface( | |
fn=transcribe_audio, | |
inputs=[ | |
gr.Radio(label="Language", choices=["en", "fr"], value="en"), | |
gr.Audio(source="microphone", type="filepath", optional=True), | |
gr.Audio(source="upload", type="filepath", optional=True), | |
], | |
outputs=[ | |
gr.Textbox(label="facebook/wav2vec"), | |
gr.Textbox(label="openai/whisper"),], | |
title=title, | |
description=description, | |
article=article, | |
examples=examples | |
).launch(debug=True) |