from transformers import pipeline import gradio as gr pipe = pipeline(model="Yuyang2022/yue") # change to "your-username/the-name-you-picked" def transcribe(audio): text = pipe(audio)["text"] return text def transcribe_video(link): video=link data=pytube.YouTube(video) audio=data.streams.get_audio_only() audio.download() pattern=r".mp4$" content=os.listdir() for i in content: if re.search(pattern,i) is not None: video=i text = pipe(video)["text"] return text iface1 = gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Base", description="Realtime demo for speech recognition using a fine-tuned Whisper-base model.", ) iface2 = gr.Interface( fn=transcribe_video, inputs=gr.Textbox(label="Youtube Link",placeholder="Youtube Link"), outputs=["text"], title="Whisper Base", description="Asynchronous demo for youtube speech recognition using a fine-tuned Whisper-base model.", ) iface1.launch()