File size: 3,081 Bytes
41a5749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
import numpy as np

# Initialize models and processors
model_id = "facebook/mms-tts"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)

LANGUAGES = {
    "English": "eng",
    "French": "fra",
    "Spanish": "spa"
}

SPEAKERS = {
    "Male": 0,
    "Female": 1
}

def text_to_speech(text, language, speaker_gender, speed):
    try:
        # Prepare inputs
        inputs = processor(
            text=text,
            language=LANGUAGES[language],
            return_tensors="pt",
        )
        
        # Generate speech
        with torch.no_grad():
            output = model.generate(
                **inputs,
                speaker_id=torch.tensor([SPEAKERS[speaker_gender]]),
                speed_ratios=torch.tensor([speed])
            )
        
        # Convert to waveform
        speech = output[0].cpu().numpy()
        sampling_rate = model.config.sampling_rate
        
        return (sampling_rate, speech)
    except Exception as e:
        return None

# Create Gradio interface
def create_interface():
    with gr.Blocks(theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="gray",
    )) as demo:
        gr.Markdown(
            """
            # ๐ŸŽ™๏ธ Multilingual Text-to-Speech
            Convert text to natural-sounding speech in multiple languages.
            """
        )
        
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="Enter Text",
                    placeholder="Type your text here...",
                    lines=5
                )
                language = gr.Dropdown(
                    choices=list(LANGUAGES.keys()),
                    value="English",
                    label="Language"
                )
                speaker = gr.Radio(
                    choices=list(SPEAKERS.keys()),
                    value="Male",
                    label="Speaker Gender"
                )
                speed = gr.Slider(
                    minimum=0.5,
                    maximum=2.0,
                    value=1.0,
                    step=0.1,
                    label="Speech Speed"
                )
                submit_btn = gr.Button("Generate Speech", variant="primary")
            
            with gr.Column():
                audio_output = gr.Audio(
                    label="Generated Speech",
                    type="numpy"
                )
                
        submit_btn.click(
            fn=text_to_speech,
            inputs=[text_input, language, speaker, speed],
            outputs=audio_output
        )
        
        gr.Markdown(
            """
            ### Features:
            - Support for English, French, and Spanish
            - Male and Female voice options
            - Adjustable speech speed
            - High-quality, natural-sounding voices
            """
        )
    
    return demo

demo = create_interface()
demo.launch()