Spaces:
Running
Running
import gradio as gr | |
import soundfile as sf | |
import numpy as np | |
from kittentts import KittenTTS | |
import os | |
# Initialize the model | |
model = KittenTTS("KittenML/kitten-tts-nano-0.1") | |
# Available voices | |
AVAILABLE_VOICES = [ | |
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', | |
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' | |
] | |
def generate_speech(text, voice, progress=gr.Progress()): | |
""" | |
Generate speech from text using KittenTTS | |
""" | |
if not text.strip(): | |
return None, "Please enter some text to generate speech." | |
try: | |
progress(0.3, desc="Loading model...") | |
# Generate audio | |
progress(0.6, desc="Generating speech...") | |
audio = model.generate(text, voice=voice) | |
progress(0.9, desc="Processing audio...") | |
# Convert to the format expected by Gradio | |
# Ensure audio is in the correct format (float32, mono) | |
if len(audio.shape) > 1: | |
audio = audio.mean(axis=1) # Convert stereo to mono if needed | |
# Normalize audio | |
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio | |
progress(1.0, desc="Complete!") | |
return audio, f"β Successfully generated speech with voice: {voice}" | |
except Exception as e: | |
return None, f"β Error generating speech: {str(e)}" | |
def create_demo(): | |
""" | |
Create the Gradio demo interface | |
""" | |
# Custom CSS for better styling | |
css = """ | |
.gradio-container { | |
max-width: 800px !important; | |
margin: auto !important; | |
} | |
.main-header { | |
text-align: center; | |
margin-bottom: 2rem; | |
} | |
.voice-selector { | |
margin: 1rem 0; | |
} | |
.output-audio { | |
margin-top: 1rem; | |
} | |
""" | |
with gr.Blocks(css=css, title="KittenTTS - High Quality Text-to-Speech") as demo: | |
# Header | |
gr.HTML(""" | |
<div class="main-header"> | |
<h1>π€ KittenTTS</h1> | |
<p><em>High Quality Text-to-Speech Generation</em></p> | |
<p>Generate natural-sounding speech from text using the KittenTTS model</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Text input | |
text_input = gr.Textbox( | |
label="Enter your text", | |
placeholder="Type or paste your text here...", | |
lines=4, | |
max_lines=10 | |
) | |
# Voice selection | |
voice_dropdown = gr.Dropdown( | |
choices=AVAILABLE_VOICES, | |
value=AVAILABLE_VOICES[1], # Default to female voice | |
label="Select Voice", | |
info="Choose from 8 different voices (4 male, 4 female)" | |
) | |
# Generate button | |
generate_btn = gr.Button( | |
"π΅ Generate Speech", | |
variant="primary", | |
size="lg" | |
) | |
with gr.Column(scale=1): | |
# Voice info | |
gr.HTML(""" | |
<div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;"> | |
<h3>Available Voices:</h3> | |
<ul> | |
<li><strong>Male voices:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li> | |
<li><strong>Female voices:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li> | |
</ul> | |
</div> | |
""") | |
# Output section | |
with gr.Row(): | |
with gr.Column(): | |
# Audio output | |
audio_output = gr.Audio( | |
label="Generated Audio", | |
type="numpy" | |
) | |
# Status message | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False | |
) | |
# Example texts | |
gr.Examples( | |
examples=[ | |
["Hello! This is a demonstration of the KittenTTS model.", "expr-voice-2-f"], | |
["The quick brown fox jumps over the lazy dog.", "expr-voice-2-m"], | |
["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"], | |
["This model works without requiring a GPU.", "expr-voice-3-m"], | |
], | |
inputs=[text_input, voice_dropdown] | |
) | |
# Footer | |
gr.HTML(""" | |
<div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f9f9f9; border-radius: 8px;"> | |
<p><strong>KittenTTS</strong> - Powered by <a href="https://huggingface.co/KittenML/kitten-tts-nano-0.1" target="_blank">KittenML/kitten-tts-nano-0.1</a></p> | |
<p>Model: KittenTTS Nano v0.1 | Sample Rate: 24kHz</p> | |
</div> | |
""") | |
# Connect the generate button | |
generate_btn.click( | |
fn=generate_speech, | |
inputs=[text_input, voice_dropdown], | |
outputs=[audio_output, status_output] | |
) | |
# Auto-generate when text is entered and Enter is pressed | |
text_input.submit( | |
fn=generate_speech, | |
inputs=[text_input, voice_dropdown], | |
outputs=[audio_output, status_output] | |
) | |
return demo | |
# Create and launch the demo | |
if __name__ == "__main__": | |
demo = create_demo() | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True, | |
debug=False | |
) |