from transformers import pipeline
import gradio as gr
from pytube import YouTube

pipe = pipeline(model="kk90ujhun/whisper-small-zh")  # change to "your-username/the-name-you-picked"

def transcribe(audio,url):
    if url:
      youtubeObject = YouTube(url).streams.first().download()
      audio = youtubeObject
    text = pipe(audio)["text"]
    return text

iface = gr.Interface(
    fn=transcribe,
    inputs=[
            gr.Audio(source="microphone", type="filepath"),
            gr.inputs.Textbox(label="give me an url",default ="https://www.youtube.com/watch?v=YzGsIavAo_E")
            ],
    outputs="text",
    title="Whisper Small Chinese",
    description="Realtime demo for chinese speech recognition using a fine-tuned Whisper small model.",
)

iface.launch()