import spaces import torch import gradio as gr from TTS.api import TTS import os from unittest.mock import patch os.environ["COQUI_TOS_AGREED"] = "1" # Function to always return 'y' def always_yes(*args, **kwargs): return 'y' # Patch the input function to always return 'y' with patch('builtins.input', always_yes): device = "cuda:0" if torch.cuda.is_available() else "cpu" tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to(device) @spaces.GPU(enable_queue=True) def generate_voice(text, audio_file_path): # Create a directory if it does not exist output_dir = "/tmp/" if not os.path.exists(output_dir): os.makedirs(output_dir) output_path = os.path.join(output_dir, "cloned_audio.wav") # Using /tmp/ directory tts.tts_to_file( text, speaker_wav=audio_file_path, # Directly use the file path string language="en", # Assuming the language is English file_path=output_path, split_sentences=True, # Assuming the TTS model requires a speaker identifier and '1' is a valid identifier ) return output_path import gradio as gr # Define the Gradio interface iface = gr.Interface( fn=generate_voice, inputs=[ gr.Textbox(label="Input Text"), gr.Audio(label="Input Audio", type="filepath") ], outputs=gr.Audio(label="Cloned Voice"), title="Voice Cloning TTS" ) # Launch the interface iface.launch()