import spaces
import torch
import gradio as gr
from TTS.api import TTS
import os
from unittest.mock import patch
os.environ["COQUI_TOS_AGREED"] = "1"


# Function to always return 'y'
def always_yes(*args, **kwargs):
    return 'y'

# Patch the input function to always return 'y'
with patch('builtins.input', always_yes):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to(device)

@spaces.GPU(enable_queue=True)
def generate_voice(text, audio_file_path):
    # Create a directory if it does not exist
    output_dir = "/tmp/"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_path = os.path.join(output_dir, "cloned_audio.wav")  # Using /tmp/ directory
    tts.tts_to_file(
        text,
        speaker_wav=audio_file_path,  # Directly use the file path string
        language="en",  # Assuming the language is English
        file_path=output_path,
        split_sentences=True,
        # Assuming the TTS model requires a speaker identifier and '1' is a valid identifier
    )
    return output_path

    
import gradio as gr
# Define the Gradio interface
iface = gr.Interface(
    fn=generate_voice,
    inputs=[
        gr.Textbox(label="Input Text"),
        gr.Audio(label="Input Audio", type="filepath")
    ],
    outputs=gr.Audio(label="Cloned Voice"),
    title="Voice Cloning TTS"
)

# Launch the interface
iface.launch()