Spaces:
Running
Running
import gradio as gr | |
from outetts.v0_1.interface import InterfaceHF | |
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M") | |
def generate_tts(text, temperature, repetition_penalty, reference_audio, reference_text): | |
if reference_audio and reference_text: | |
speaker = interface.create_speaker(reference_audio, reference_text) | |
else: | |
speaker = None | |
output = interface.generate( | |
text=text, | |
speaker=speaker, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty | |
) | |
output.save("output.wav") | |
return "output.wav" | |
with gr.Blocks() as demo: | |
gr.Markdown("# OuteTTS-0.1-350M Text-to-Speech Demo") | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...") | |
temperature = gr.Slider(0.1, 1.0, value=0.1, label="Temperature") | |
repetition_penalty = gr.Slider(0.5, 2.0, value=1.1, label="Repetition Penalty") | |
gr.Markdown(""" | |
**Note**: For voice cloning, both a reference audio file and its corresponding transcription must be provided. | |
If either the audio file or transcription is missing, the model will generate audio with random characteristics.""") | |
reference_audio = gr.Audio(label="Reference Audio (for voice cloning)", type="filepath") | |
reference_text = gr.Textbox(label="Reference Transcription Text (matching the audio)", placeholder="Enter reference text here if using voice cloning") | |
submit_button = gr.Button("Generate Speech") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Generated Audio", type="filepath") | |
submit_button.click( | |
fn=generate_tts, | |
inputs=[text_input, temperature, repetition_penalty, reference_audio, reference_text], | |
outputs=audio_output | |
) | |
demo.launch() | |