texttoson / app.py
KIMOSSINO's picture
Create app.py
41a5749 verified
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
import numpy as np
# Initialize models and processors
model_id = "facebook/mms-tts"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)
LANGUAGES = {
"English": "eng",
"French": "fra",
"Spanish": "spa"
}
SPEAKERS = {
"Male": 0,
"Female": 1
}
def text_to_speech(text, language, speaker_gender, speed):
try:
# Prepare inputs
inputs = processor(
text=text,
language=LANGUAGES[language],
return_tensors="pt",
)
# Generate speech
with torch.no_grad():
output = model.generate(
**inputs,
speaker_id=torch.tensor([SPEAKERS[speaker_gender]]),
speed_ratios=torch.tensor([speed])
)
# Convert to waveform
speech = output[0].cpu().numpy()
sampling_rate = model.config.sampling_rate
return (sampling_rate, speech)
except Exception as e:
return None
# Create Gradio interface
def create_interface():
with gr.Blocks(theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="gray",
)) as demo:
gr.Markdown(
"""
# πŸŽ™οΈ Multilingual Text-to-Speech
Convert text to natural-sounding speech in multiple languages.
"""
)
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter Text",
placeholder="Type your text here...",
lines=5
)
language = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="English",
label="Language"
)
speaker = gr.Radio(
choices=list(SPEAKERS.keys()),
value="Male",
label="Speaker Gender"
)
speed = gr.Slider(
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
label="Speech Speed"
)
submit_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
audio_output = gr.Audio(
label="Generated Speech",
type="numpy"
)
submit_btn.click(
fn=text_to_speech,
inputs=[text_input, language, speaker, speed],
outputs=audio_output
)
gr.Markdown(
"""
### Features:
- Support for English, French, and Spanish
- Male and Female voice options
- Adjustable speech speed
- High-quality, natural-sounding voices
"""
)
return demo
demo = create_interface()
demo.launch()