from transformers import pipeline asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") import gradio as gr demo = gr.Blocks() # now ho to make the demo take long time audio def transcribe_long_form(filepath): if filepath is None: gr.Warning("Please submit again <3 ") return "" output = asr( filepath, max_new_tokens=256, chunk_length_s=30, batch_size=8, ) return output["text"] mic_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") file_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"]) demo.launch(share=True)