import gradio as gr from transformers import pipeline import numpy as np demo = gr.Blocks() transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") def transcribe_speech(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = transcriber (filepath) return output["text"] def transcribe_long_form(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = transcriber( filepath, max_new_tokens=256, chunk_length_s=30, batch_size=8, ) return output["text"] mic_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") file_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) # demo.launch(share=True, # server_port=int(os.environ['PORT1'])) demo.launch()