from transformers import pipeline import gradio as gr from pytube import YouTube def dl_youtube_audio(youtube_link): yt = YouTube(youtube_link) audio_stream = yt.streams.filter(only_audio=True).first() audio_path = f"youtube_audio_{yt.video_id}.mp3" audio_stream.download(output_path=".", filename="youtube_audio") return audio_path def transcribe_audio(audio_path): model = pipeline(model="SofiaK/checkpoints") return model(audio_path)["text"] with gr.Blocks() as demo: with gr.Row(): with gr.Column(): radio = gr.Radio( choices=["Audio", "Youtube"], label="Choose your input type", value="Audio", ) audio_input = gr.Audio( sources=["upload", "microphone"], type="filepath", label="Upload Audio, or speak in the microphone", visible=True, interactive=True, ) youtube_input = gr.Textbox( value="https://www.youtube.com/", label="Youtube Link", visible=False, interactive=True, ) btn = gr.Button( "Transcript", ) with gr.Column(): output = gr.Text(label="Model output") def make_visible(val): audio_visible = val == "Audio" return { audio_input: {"visible": audio_visible, "__type__": "update"}, youtube_input: {"visible": not audio_visible, "__type__": "update"}, } radio.change(make_visible, inputs=radio, outputs=[audio_input, youtube_input]) def on_button_click(): if radio.value == "Audio": audio_path = audio_input.value else: audio_path = dl_youtube_audio(youtube_input.value) output.value = transcribe_audio(audio_path) btn.click( fn=on_button_click, inputs=[radio, audio_input, youtube_input], outputs=output ) demo.launch()