File size: 2,486 Bytes
05862c2
81b3ec7
 
 
 
 
05862c2
81b3ec7
 
 
 
05862c2
81b3ec7
 
 
 
05862c2
81b3ec7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05862c2
81b3ec7
 
 
 
 
05862c2
81b3ec7
 
 
05862c2
 
81b3ec7
 
05862c2
81b3ec7
05862c2
 
81b3ec7
 
 
 
 
 
 
 
 
 
 
 
05862c2
81b3ec7
 
 
 
05862c2
 
81b3ec7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
import soundfile as sf
import os
import time

# Load the Kokoro-TTS model and processor
model_name = "hexgrad/Kokoro-TTS"
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
processor = AutoProcessor.from_pretrained(model_name)

# Define available speakers (update this based on the model's capabilities)
speakers = ["Speaker 1", "Speaker 2", "Speaker 3"]  # Replace with actual speaker names

# Function to generate TTS
def generate_tts(text, speaker):
    try:
        # Preprocess input text
        inputs = processor(text, return_tensors="pt", speaker=speaker)
        
        # Generate speech
        with torch.no_grad():
            speech = model.generate(**inputs)
        
        # Save the output as a temporary file with an auto-generated name
        timestamp = int(time.time())
        output_file = f"output_{timestamp}.wav"
        sf.write(output_file, speech.numpy(), samplerate=22050)  # Adjust samplerate if needed
        
        return output_file
    except Exception as e:
        return str(e)

# Gradio interface
def tts_app(text, speaker):
    output_file = generate_tts(text, speaker)
    if output_file.endswith(".wav"):
        return output_file, f"Generated: {output_file}"
    else:
        return None, output_file

# Auto-naming system for downloads
def get_download_name():
    return f"tts_output_{int(time.time())}.wav"

# Create the Gradio app
with gr.Blocks() as demo:
    gr.Markdown("# Kokoro-TTS v1.9: Long Input TTS Generation")
    
    with gr.Row():
        text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here...", lines=10)
        speaker_dropdown = gr.Dropdown(label="Select Speaker", choices=speakers, value=speakers[0])
    
    generate_button = gr.Button("Generate TTS")
    
    with gr.Row():
        audio_output = gr.Audio(label="Generated Audio")
        status_output = gr.Textbox(label="Status", placeholder="Generation status will appear here...")
    
    download_button = gr.Button("Download Audio")
    download_output = gr.File(label="Download Generated Audio")
    
    # Link functions to interface
    generate_button.click(
        fn=tts_app,
        inputs=[text_input, speaker_dropdown],
        outputs=[audio_output, status_output]
    )
    
    download_button.click(
        fn=get_download_name,
        outputs=download_output
    )

# Launch the app
demo.launch()