from transformers import pipeline import os import gradio as gr import torch #from IPython.display import Audio as IPythonAudio #from gtts import gTTS #import IPython.display as ipd #Audio to text #asr = pipeline(task="automatic-speech-recognition", # model="distil-whisper/distil-small.en") #Text to text translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16) #Text to audio pipe = pipeline("text-to-speech", model="suno/bark-small") demo = gr.Blocks() def transcribe_speech(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" #asr(filepath)["text"] output = translator(filepath, src_lang="eng_Latn", tgt_lang="hin_Deva") narrated_text=pipe(output[0]['translation_text']) #tts = gTTS(text=narrated_text, lang='hi', slow=False) #tts.save("translated_audio.mp3") #return ipd.Audio("translated_audio.mp3", autoplay=True) return narrated_text mic_transcribe = gr.Interface( fn=transcribe_speech, #inputs=gr.Audio(sources="microphone", # type="filepath"), inputs=gr.Textbox(label="text",lines=3), outputs="audio", #outputs=gr.Audio(label="Translated Message"), allow_flagging="never") file_transcribe = gr.Interface( fn=transcribe_speech, #inputs=gr.Audio(sources="upload", # type="filepath"), inputs=gr.Textbox(label="text",lines=3), outputs="audio", #outputs=gr.Audio(label="Translated Message"), allow_flagging="never" ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(share=True) demo.close()