from transformers import pipeline import gradio as gr model = pipeline( "automatic-speech-recognition", model='openai/whisper-large-v3', chunk_length_s=30, generate_kwargs={"task": "transcribe"} ) def transcribe_audio(mic=None, file=None, return_timestamps=False): if mic is not None: audio = mic elif file is not None: audio = file else: return "You must either provide a mic recording or a file" result = model(audio, return_timestamps=return_timestamps, batch_size=8) if return_timestamps: return result['chunks'] else: return result['text'] gr.Interface( fn=transcribe_audio, inputs=[ gr.Audio(sources=["microphone"], type="filepath"), gr.Audio(sources=["upload"], type="filepath"), gr.Checkbox(label="Add timestamps?") ], outputs="text", ).launch()