|
import gradio as gr |
|
import torch |
|
from transformers import AutoProcessor, AutoModel |
|
import numpy as np |
|
|
|
|
|
model_id = "facebook/mms-tts" |
|
processor = AutoProcessor.from_pretrained(model_id) |
|
model = AutoModel.from_pretrained(model_id) |
|
|
|
LANGUAGES = { |
|
"English": "eng", |
|
"French": "fra", |
|
"Spanish": "spa" |
|
} |
|
|
|
SPEAKERS = { |
|
"Male": 0, |
|
"Female": 1 |
|
} |
|
|
|
def text_to_speech(text, language, speaker_gender, speed): |
|
try: |
|
|
|
inputs = processor( |
|
text=text, |
|
language=LANGUAGES[language], |
|
return_tensors="pt", |
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
output = model.generate( |
|
**inputs, |
|
speaker_id=torch.tensor([SPEAKERS[speaker_gender]]), |
|
speed_ratios=torch.tensor([speed]) |
|
) |
|
|
|
|
|
speech = output[0].cpu().numpy() |
|
sampling_rate = model.config.sampling_rate |
|
|
|
return (sampling_rate, speech) |
|
except Exception as e: |
|
return None |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(theme=gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="gray", |
|
)) as demo: |
|
gr.Markdown( |
|
""" |
|
# ποΈ Multilingual Text-to-Speech |
|
Convert text to natural-sounding speech in multiple languages. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
text_input = gr.Textbox( |
|
label="Enter Text", |
|
placeholder="Type your text here...", |
|
lines=5 |
|
) |
|
language = gr.Dropdown( |
|
choices=list(LANGUAGES.keys()), |
|
value="English", |
|
label="Language" |
|
) |
|
speaker = gr.Radio( |
|
choices=list(SPEAKERS.keys()), |
|
value="Male", |
|
label="Speaker Gender" |
|
) |
|
speed = gr.Slider( |
|
minimum=0.5, |
|
maximum=2.0, |
|
value=1.0, |
|
step=0.1, |
|
label="Speech Speed" |
|
) |
|
submit_btn = gr.Button("Generate Speech", variant="primary") |
|
|
|
with gr.Column(): |
|
audio_output = gr.Audio( |
|
label="Generated Speech", |
|
type="numpy" |
|
) |
|
|
|
submit_btn.click( |
|
fn=text_to_speech, |
|
inputs=[text_input, language, speaker, speed], |
|
outputs=audio_output |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### Features: |
|
- Support for English, French, and Spanish |
|
- Male and Female voice options |
|
- Adjustable speech speed |
|
- High-quality, natural-sounding voices |
|
""" |
|
) |
|
|
|
return demo |
|
|
|
demo = create_interface() |
|
demo.launch() |
|
|