from pytube import YouTube from transformers import pipeline import gradio as gr import os pipe = pipeline(model = "Valdimarb13/whisper-small-icelandic") def transcribe(audio): text = pipe(audio)["text"] return text def get_audio(url): yt = YouTube(url) video = yt.streams.filter(only_audio=True).first() out_file=video.download(output_path=".") base, ext = os.path.splitext(out_file) new_file = base+'.mp3' os.rename(out_file, new_file) a = new_file return a def get_text(url): text = pipe(get_audio(url))["text"] return text with gr.Blocks() as demo: with gr.Tab('Enter the URL of a video with Icelandic speech to get a transcription'): with gr.Row(): input_text = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL') output_text = gr.Textbox(placeholder='Transcription of the video', label='Transcription') youtube_button = gr.Button('Get Transcription') with gr.Tab("Speak into the microphone to get a transcription"): with gr.Row(): input_speech = gr.Audio(source="microphone", type="filepath") output_speech = gr.Textbox("Transcribed audio") speech_button = gr.Button("Submit") youtube_button.click(get_text, inputs = input_text, outputs = output_text) speech_button.click(transcribe, inputs = input_speech, outputs = output_speech) demo.launch()