🎤 KittenTTS
High Quality Text-to-Speech Generation
Generate natural-sounding speech from text using the KittenTTS model
import gradio as gr import soundfile as sf import numpy as np from kittentts import KittenTTS import os # Initialize the model model = KittenTTS("KittenML/kitten-tts-nano-0.1") # Available voices AVAILABLE_VOICES = [ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ] def generate_speech(text, voice, progress=gr.Progress()): """ Generate speech from text using KittenTTS """ if not text.strip(): return None, "Please enter some text to generate speech." try: progress(0.3, desc="Loading model...") # Generate audio progress(0.6, desc="Generating speech...") audio = model.generate(text, voice=voice) progress(0.9, desc="Processing audio...") # Convert to the format expected by Gradio # Ensure audio is in the correct format (float32, mono) if len(audio.shape) > 1: audio = audio.mean(axis=1) # Convert stereo to mono if needed # Normalize audio audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio progress(1.0, desc="Complete!") return audio, f"✅ Successfully generated speech with voice: {voice}" except Exception as e: return None, f"❌ Error generating speech: {str(e)}" def create_demo(): """ Create the Gradio demo interface """ # Custom CSS for better styling css = """ .gradio-container { max-width: 800px !important; margin: auto !important; } .main-header { text-align: center; margin-bottom: 2rem; } .voice-selector { margin: 1rem 0; } .output-audio { margin-top: 1rem; } """ with gr.Blocks(css=css, title="KittenTTS - High Quality Text-to-Speech") as demo: # Header gr.HTML("""
High Quality Text-to-Speech Generation
Generate natural-sounding speech from text using the KittenTTS model
KittenTTS - Powered by KittenML/kitten-tts-nano-0.1
Model: KittenTTS Nano v0.1 | Sample Rate: 24kHz