|
import torch |
|
import scipy |
|
import gradio as gr |
|
|
|
from transformers import set_seed, pipeline |
|
from transformers import VitsTokenizer, VitsModel |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM |
|
from datasets import load_dataset, Audio |
|
|
|
import speech_to_text, text_to_speech, translation |
|
|
|
language_list = ['mos', 'fra', 'eng'] |
|
|
|
demo = gr.Blocks() |
|
|
|
mms_stt = gr.Interface( |
|
fn=speech_to_text.transcribe, |
|
inputs=[ |
|
gr.Audio(sources=["microphone", "upload"], type="filepath"), |
|
gr.Dropdown(language_list, label="Language") |
|
], |
|
outputs="text", |
|
title="Speech-to-text" |
|
) |
|
|
|
mms_tts = gr.Interface( |
|
fn=text_to_speech.synthesize_facebook, |
|
inputs=[ |
|
gr.Text(label="Input text"), |
|
gr.Dropdown(language_list, label="Language") |
|
], |
|
outputs=[ |
|
gr.Audio(label="Generated Audio", type="numpy") |
|
], |
|
title="Text-to-speech" |
|
) |
|
|
|
mms_translate = gr.Interface( |
|
fn=translation.translation, |
|
inputs=[ |
|
gr.Textbox(label="Text", placeholder="Yaa sõama"), |
|
gr.Dropdown(label="Source Language", choices=["eng_Latn", "fra_Latn", "mos_Latn"]), |
|
gr.Dropdown(label="Target Language", choices=["eng_Latn", "fra_Latn", "mos_Latn"]) |
|
], |
|
outputs=["text"], |
|
examples=[["Building a translation demo with Gradio is so easy!", "eng_Latn", "mos_Latn"]], |
|
title="Translation Demo", |
|
) |
|
|
|
with demo: |
|
gr.TabbedInterface( |
|
[mms_translate, mms_tts, mms_stt], |
|
["Translation", "Text-to-speech", "Speech-to-text"], |
|
) |
|
|
|
demo.launch() |