from transformers import pipeline import gradio as gr from pytube import YouTube pipe = pipeline(model="kk90ujhun/whisper-small-zh") # change to "your-username/the-name-you-picked" def transcribe(audio,url): if url: youtubeObject = YouTube(url).streams.first().download() audio = youtubeObject text = pipe(audio)["text"] return text iface = gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath"), gr.inputs.Textbox(label="give me an url",default ="https://www.youtube.com/watch?v=YzGsIavAo_E") ], outputs="text", title="Whisper Small Chinese", description="Realtime demo for chinese speech recognition using a fine-tuned Whisper small model.", ) iface.launch()