from transformers import pipeline import gradio as gr from pytube import YouTube pipe = pipeline(model="Mei000/whisper-small-sv-SE") def link_transcribe(link): path = YouTube(link).streams.filter(only_audio=True)[0].download(filename="tmp.mp4") results_text = pipe(path)["text"] return results_text def transcribe(audio): text = pipe(audio)["text"] return text def populate_metadata(link): lin = YouTube(link) return lin.thumbnail_url, lin.title with gr.Blocks() as demo: gr.Markdown("Whisper Small Swedish") with gr.Row(): with gr.TabItem("Record from Microphone"): record_input = gr.Audio(source="microphone", type="filepath",label="Record from microphone") record_button = gr.Button("Submit") record_outputs = [gr.Textbox(label="Transcription from Microphone"),] with gr.Row().style(equal_height=True): link = gr.Textbox(label="YouTube Link") title = gr.Label(label="Video Title") with gr.Row().style(equal_height=True): img = gr.Image(label="Thumbnail") youtube_outputs = [ gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10) ] with gr.Row().style(equal_height=True): youtube_button = gr.Button("Submit") with gr.Row(): with gr.TabItem("Audio File"): offline_file = gr.Audio(source="upload", type="filepath",label="Upload An Audio File") offline_upload = gr.Button("Submit") offline_outputs = [gr.Textbox(label="Transcription from uploaded audio file"),] record_button.click( fn=transcribe, inputs=record_input, outputs=record_outputs,) youtube_button.click( fn=link_transcribe, inputs=link, outputs=youtube_outputs,) offline_upload.click( fn=transcribe, inputs=offline_file, outputs=offline_outputs,) link.change(fn=populate_metadata, inputs=[link], outputs=[img, title]) demo.launch()