KittenTTSDemo / app.py
Vishwas1's picture
Upload 5 files
e327671 verified
import gradio as gr
import soundfile as sf
import numpy as np
from kittentts import KittenTTS
import os
# Initialize the model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
# Available voices
AVAILABLE_VOICES = [
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]
def generate_speech(text, voice, progress=gr.Progress()):
"""
Generate speech from text using KittenTTS
"""
if not text.strip():
return None, "Please enter some text to generate speech."
try:
progress(0.3, desc="Loading model...")
# Generate audio
progress(0.6, desc="Generating speech...")
audio = model.generate(text, voice=voice)
progress(0.9, desc="Processing audio...")
# Convert to the format expected by Gradio
# Ensure audio is in the correct format (float32, mono)
if len(audio.shape) > 1:
audio = audio.mean(axis=1) # Convert stereo to mono if needed
# Normalize audio
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
progress(1.0, desc="Complete!")
return audio, f"βœ… Successfully generated speech with voice: {voice}"
except Exception as e:
return None, f"❌ Error generating speech: {str(e)}"
def create_demo():
"""
Create the Gradio demo interface
"""
# Custom CSS for better styling
css = """
.gradio-container {
max-width: 800px !important;
margin: auto !important;
}
.main-header {
text-align: center;
margin-bottom: 2rem;
}
.voice-selector {
margin: 1rem 0;
}
.output-audio {
margin-top: 1rem;
}
"""
with gr.Blocks(css=css, title="KittenTTS - High Quality Text-to-Speech") as demo:
# Header
gr.HTML("""
<div class="main-header">
<h1>🎀 KittenTTS</h1>
<p><em>High Quality Text-to-Speech Generation</em></p>
<p>Generate natural-sounding speech from text using the KittenTTS model</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
# Text input
text_input = gr.Textbox(
label="Enter your text",
placeholder="Type or paste your text here...",
lines=4,
max_lines=10
)
# Voice selection
voice_dropdown = gr.Dropdown(
choices=AVAILABLE_VOICES,
value=AVAILABLE_VOICES[1], # Default to female voice
label="Select Voice",
info="Choose from 8 different voices (4 male, 4 female)"
)
# Generate button
generate_btn = gr.Button(
"🎡 Generate Speech",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Voice info
gr.HTML("""
<div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">
<h3>Available Voices:</h3>
<ul>
<li><strong>Male voices:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>
<li><strong>Female voices:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>
</ul>
</div>
""")
# Output section
with gr.Row():
with gr.Column():
# Audio output
audio_output = gr.Audio(
label="Generated Audio",
type="numpy"
)
# Status message
status_output = gr.Textbox(
label="Status",
interactive=False
)
# Example texts
gr.Examples(
examples=[
["Hello! This is a demonstration of the KittenTTS model.", "expr-voice-2-f"],
["The quick brown fox jumps over the lazy dog.", "expr-voice-2-m"],
["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
["This model works without requiring a GPU.", "expr-voice-3-m"],
],
inputs=[text_input, voice_dropdown]
)
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f9f9f9; border-radius: 8px;">
<p><strong>KittenTTS</strong> - Powered by <a href="https://huggingface.co/KittenML/kitten-tts-nano-0.1" target="_blank">KittenML/kitten-tts-nano-0.1</a></p>
<p>Model: KittenTTS Nano v0.1 | Sample Rate: 24kHz</p>
</div>
""")
# Connect the generate button
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_output]
)
# Auto-generate when text is entered and Enter is pressed
text_input.submit(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_output]
)
return demo
# Create and launch the demo
if __name__ == "__main__":
demo = create_demo()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
debug=False
)