Chandranshu Jain
Update app.py
32c0cf2 verified
raw
history blame
1.7 kB
from transformers import pipeline
import os
import gradio as gr
import torch
from IPython.display import Audio as IPythonAudio
from gtts import gTTS
import IPython.display as ipd
#Audio to text
asr = pipeline(task="automatic-speech-recognition",
model="distil-whisper/distil-small.en")
#Text to text
translator = pipeline(task="translation",
model="facebook/nllb-200-distilled-600M",
torch_dtype=torch.bfloat16)
#Text to audio
pipe = pipeline("text-to-speech", model="suno/bark-small")
demo = gr.Blocks()
def transcribe_speech(filepath):
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
output = translator(asr(filepath)["text"],
src_lang="eng_Latn",
tgt_lang="hin_Deva")
narrated_text=pipe(output[0]['translation_text'])
tts = gTTS(text=narrated_text, lang='hi', slow=False)
tts.save("translated_audio.mp3")
return ipd.Audio("translated_audio.mp3", autoplay=True)
mic_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="microphone",
type="filepath"),
outputs=gr.Audio(label="Translated Message"),
allow_flagging="never")
file_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="upload",
type="filepath"),
outputs=gr.Audio(label="Translated Message"),
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Transcribe Microphone",
"Transcribe Audio File"],
)
demo.launch(share=True)
demo.close()