import gradio as gr from transformers import pipeline checkpoint = "openai/whisper-small" pipe = pipeline(model=checkpoint) def transcribe(microphone, file_upload): warn_output = "" if (microphone is not None) and (file_upload is not None): warn_output = "WARNING: You've uploaded an audio file and used the microphone. " \ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n" file = microphone elif (microphone is None) and (file_upload is None): return "ERROR: You have to either use the microphone or upload an audio file" file = microphone if microphone is not None else file_upload text = pipe(file)["text"] return warn_output + text iface = gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type='filepath', optional=True), gr.inputs.Audio(source="upload", type='filepath', optional=True), ], outputs="text", layout="horizontal", theme="huggingface", title="Whisper Speech Recognition Demo", description=f"Demo for speech recognition using the fine-tuned checkpoint: [{checkpoint}](https://huggingface.co/{checkpoint}).", allow_flagging='never', ) iface.launch(enable_queue=True)