import os from transformers import pipeline import gradio as gr import torch import pytube as pt checkpoint = "GGmorello/whisper-small-it" device = 0 if torch.cuda.is_available() else "cpu" print(device) pipe = pipeline(task = "automatic-speech-recognition", model = checkpoint,chunk_length_s=30,device = device) def transcribe(audio): text = pipe(audio)["text"] return text def transcribe_url(yt_url): yt = pt.YouTube(yt_url) stream = yt.streams.filter(only_audio=True)[0] stream.download(filename = "audio.mp3") text = pipe("audio.mp3")["text"] return text demo = gr.Blocks() microphone_interface = gr.Interface( fn=transcribe, inputs = gr.Audio(sources="microphone", type="filepath"), outputs="text", title="Whisper Small Italian Finetuned raw microphone audio", description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model." ) file_interface = gr.Interface( fn=transcribe, inputs = gr.Audio(sources="upload", type="filepath"), outputs="text", title="Whisper Small Italian Finetuned for audio file.", description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model." ) url_interface = gr.Interface( fn = transcribe_url, inputs = gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"), outputs = "text", title = "Whisper Small Italian Finetuned for URL transcription", description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model." ) with demo: gr.TabbedInterface([microphone_interface,file_interface, url_interface], ["Transcribe Audio", "Transcribe File" , "Transcribe YouTube"]) demo.launch(share=True)