speech_to_text / app.py
Aashiue's picture
Update app.py
99e2cd1
raw
history blame contribute delete
No virus
911 Bytes
import gradio as gr
from transformers import pipeline
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
transcribe = pipeline("automatic-speech-recognition")
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
def speech_to_text(audio):
text = transcribe(audio)["text"]
model_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
generated_tokens = model.generate(
**model_inputs,
forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"]
)
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return translation
gr.Interface(
fn=speech_to_text,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text").launch()