Spaces:

Najma-Nur
/

ASR-model

Runtime error

File size: 1,264 Bytes

cc72f0f
 
e0613b2
d08e12e
 
 
e0613b2
 
 
 
 
 
 
 
 
 
 
 
d08e12e
e0613b2
 
 
 
 
 
4e3ae1a
 
d08e12e
4e3ae1a
 
e0613b2
d08e12e
e0613b2
d08e12e
4e3ae1a
 
d08e12e
 
a7a3a53
e0613b2


import gradio as gr
from TTS.api import TTS
import numpy as np
import soundfile as sf

# Initialize the TTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name, gpu=False)  # Set gpu=True if CUDA is available

def clone_voice(reference_audio, text):
    """
    Generate cloned speech from input voice and text.
    """
    # Save the reference audio to a temporary file
    ref_audio_path = "reference.wav"
    sf.write(ref_audio_path, reference_audio[0], reference_audio[1])

    # Generate speech
    output_path = "cloned_voice.wav"
    tts.tts_to_file(text=text, speaker_wav=ref_audio_path, language="en", file_path=output_path)

    # Load and return the generated audio
    cloned_audio, sr = sf.read(output_path)
    return (sr, cloned_audio)

# Gradio Interface
description = "Upload an audio file of the voice you want to clone and provide text to generate speech in the same voice."
iface = gr.Interface(
    fn=clone_voice,
    inputs=[gr.Audio(source="upload", type="numpy", label="Reference Audio"), gr.Textbox(label="Text to Speak")],
    outputs=gr.Audio(label="Cloned Voice Output"),
    title="Real-Time Voice Cloning with XTTS",
    description=description
)

if __name__ == "__main__":
    iface.launch()