AudioTranslator / app.py
ChandJain's picture
Create app.py
cbc2faa verified
raw
history blame
1.82 kB
from transformers import pipeline
import os
import gradio as gr
import torch
from IPython.display import Audio as IPythonAudio
from gtts import gTTS
import IPython.display as ipd
#Audio to text
asr = pipeline(task="automatic-speech-recognition",
model="distil-whisper/distil-small.en")
#Text to text
translator = pipeline(task="translation",
model="facebook/nllb-200-distilled-600M",
torch_dtype=torch.bfloat16)
#Text to audio
pipe = pipeline("text-to-speech", model="suno/bark-small",
torch_dtype=torch.bfloat16)
demo = gr.Blocks()
def transcribe_speech(filepath):
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
output = translator(asr(filepath)["text"],
src_lang="eng_Latn",
tgt_lang="hin_Deva")
narrated_text=pipe(output[0]['translation_text'])
#tts = gTTS(text=narrated_text, lang='hi', slow=False)
#tts.save("translated_audio.mp3")
#return ipd.Audio("translated_audio.mp3", autoplay=True)
return narrated_text
mic_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="microphone",
type="filepath"),
outputs="audio",
#outputs=gr.Audio(label="Translated Message"),
allow_flagging="never")
file_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="upload",
type="filepath"),
outputs="audio",
#outputs=gr.Audio(label="Translated Message"),
allow_flagging="never"
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Transcribe Microphone",
"Transcribe Audio File"],
)
demo.launch(share=True)
demo.close()