Spaces:
Running
Running
File size: 5,961 Bytes
8508174 3f13e9e 8508174 3f13e9e 8508174 e327671 8508174 e327671 8508174 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
import soundfile as sf
import numpy as np
from kittentts import KittenTTS
import os
# Initialize the model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
# Available voices
AVAILABLE_VOICES = [
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]
def generate_speech(text, voice, progress=gr.Progress()):
"""
Generate speech from text using KittenTTS
"""
if not text.strip():
return None, "Please enter some text to generate speech."
try:
progress(0.3, desc="Loading model...")
# Generate audio
progress(0.6, desc="Generating speech...")
audio = model.generate(text, voice=voice)
progress(0.9, desc="Processing audio...")
# Convert to the format expected by Gradio
# Ensure audio is in the correct format (float32, mono)
if len(audio.shape) > 1:
audio = audio.mean(axis=1) # Convert stereo to mono if needed
# Normalize audio
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
progress(1.0, desc="Complete!")
return audio, f"β
Successfully generated speech with voice: {voice}"
except Exception as e:
return None, f"β Error generating speech: {str(e)}"
def create_demo():
"""
Create the Gradio demo interface
"""
# Custom CSS for better styling
css = """
.gradio-container {
max-width: 800px !important;
margin: auto !important;
}
.main-header {
text-align: center;
margin-bottom: 2rem;
}
.voice-selector {
margin: 1rem 0;
}
.output-audio {
margin-top: 1rem;
}
"""
with gr.Blocks(css=css, title="KittenTTS - High Quality Text-to-Speech") as demo:
# Header
gr.HTML("""
<div class="main-header">
<h1>π€ KittenTTS</h1>
<p><em>High Quality Text-to-Speech Generation</em></p>
<p>Generate natural-sounding speech from text using the KittenTTS model</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
# Text input
text_input = gr.Textbox(
label="Enter your text",
placeholder="Type or paste your text here...",
lines=4,
max_lines=10
)
# Voice selection
voice_dropdown = gr.Dropdown(
choices=AVAILABLE_VOICES,
value=AVAILABLE_VOICES[1], # Default to female voice
label="Select Voice",
info="Choose from 8 different voices (4 male, 4 female)"
)
# Generate button
generate_btn = gr.Button(
"π΅ Generate Speech",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Voice info
gr.HTML("""
<div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">
<h3>Available Voices:</h3>
<ul>
<li><strong>Male voices:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>
<li><strong>Female voices:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>
</ul>
</div>
""")
# Output section
with gr.Row():
with gr.Column():
# Audio output
audio_output = gr.Audio(
label="Generated Audio",
type="numpy"
)
# Status message
status_output = gr.Textbox(
label="Status",
interactive=False
)
# Example texts
gr.Examples(
examples=[
["Hello! This is a demonstration of the KittenTTS model.", "expr-voice-2-f"],
["The quick brown fox jumps over the lazy dog.", "expr-voice-2-m"],
["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
["This model works without requiring a GPU.", "expr-voice-3-m"],
],
inputs=[text_input, voice_dropdown]
)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f9f9f9; border-radius: 8px;">
<p><strong>KittenTTS</strong> - Powered by <a href="https://huggingface.co/KittenML/kitten-tts-nano-0.1" target="_blank">KittenML/kitten-tts-nano-0.1</a></p>
<p>Model: KittenTTS Nano v0.1 | Sample Rate: 24kHz</p>
</div>
""")
# Connect the generate button
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_output]
)
# Auto-generate when text is entered and Enter is pressed
text_input.submit(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_output]
)
return demo
# Create and launch the demo
if __name__ == "__main__":
demo = create_demo()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
debug=False
) |