KittenTTSDemo / app_minimal.py
Vishwas1's picture
Upload app_minimal.py
acd6515 verified
import gradio as gr
import numpy as np
from kittentts import KittenTTS
# Initialize the model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
# Available voices
AVAILABLE_VOICES = [
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]
def generate_speech(text, voice):
"""Generate speech from text using KittenTTS"""
if not text.strip():
return None, "Please enter some text to generate speech."
try:
# Generate audio
audio = model.generate(text, voice=voice)
# Convert to the format expected by Gradio
if len(audio.shape) > 1:
audio = audio.mean(axis=1) # Convert stereo to mono if needed
# Normalize audio
audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
# Return in the format expected by Gradio Audio component: (sample_rate, audio_data)
return (24000, audio), f"βœ… Successfully generated speech with voice: {voice}"
except Exception as e:
return None, f"❌ Error generating speech: {str(e)}"
# Create the interface using Interface instead of Blocks
demo = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(label="Enter your text", placeholder="Type your text here...", lines=3),
gr.Dropdown(choices=AVAILABLE_VOICES, value=AVAILABLE_VOICES[1], label="Select Voice")
],
outputs=[
gr.Audio(label="Generated Audio"),
gr.Textbox(label="Status", interactive=False)
],
title="🎀 KittenTTS - High Quality Text-to-Speech",
description="Generate natural-sounding speech from text using the KittenTTS model",
examples=[
["Hello! This is a demonstration of the KittenTTS model.", "expr-voice-2-f"],
["The quick brown fox jumps over the lazy dog.", "expr-voice-2-m"],
["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
]
)
# Launch the demo
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)