from transformers import pipeline import os import gradio as gr import torch asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16) demo = gr.Blocks() def transcribe_speech(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = translator(asr(filepath)["text"], src_lang="eng_Latn", tgt_lang="hin_Deva") return output mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(share=True) demo.close()