import os import subprocess import sys # Function to setup the environment def setup_environment(): # Clone the Tortoise-TTS repository if it doesn't exist if not os.path.exists("tortoise-tts"): subprocess.run(["git", "clone", "https://github.com/neonbjb/tortoise-tts.git"], check=True) # Change directory to the cloned repository os.chdir("tortoise-tts") # Install requirements from requirements.txt subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True) # Install the package using setup.py subprocess.run([sys.executable, "setup.py", "install"], check=True) # Install Gradio subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True) def main(): # Call the setup function to ensure everything is installed setup_environment() # Import Gradio and other required libraries after setting up the environment import gradio as gr import torchaudio import time from datetime import datetime # Ensure the tortoise package is correctly imported try: from tortoise.api import TextToSpeech except ImportError as e: raise ImportError("Tortoise TTS not found. Make sure it is correctly installed.") from e # Initialize the TextToSpeech instance tts = TextToSpeech() VOICE_OPTIONS = [ "random", # special option for random voice "custom_voice", # special option for custom voice "disabled", # special option for disabled voice ] def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed): if voice != "custom_voice": voices = [voice] else: voices = [] if voice_b != "disabled": voices.append(voice_b) if voice_c != "disabled": voices.append(voice_c) if emotion != "None/Custom": text = f"[I am really {emotion.lower()},] {text}" elif prompt.strip() != "": text = f"[{prompt},] {text}" c = None if voice == "custom_voice": if mic_audio is None: raise gr.Error("Please provide audio from mic when choosing custom voice") c = torchaudio.load(mic_audio)[0] # Use torchaudio to load audio if len(voices) == 1 or len(voices) == 0: if voice == "custom_voice": voice_samples, conditioning_latents = [c], None else: voice_samples, conditioning_latents = tts.load_voice(voice) # Ensure to call TTS method else: voice_samples, conditioning_latents = tts.load_voices(voices) if voice == "custom_voice": voice_samples.append(c) sample_voice = voice_samples[0] if len(voice_samples) else None start_time = time.time() gen, _ = tts.tts_with_preset( text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset, use_deterministic_seed=seed, return_deterministic_state=True, k=3, ) return ( (22050, sample_voice.squeeze().cpu().numpy()), (24000, gen[0].squeeze().cpu().numpy()), (24000, gen[1].squeeze().cpu().numpy()), (24000, gen[2].squeeze().cpu().numpy()), ) # Create the Gradio interface interface = gr.Interface( fn=inference, inputs=[ gr.Textbox(lines=4, label="Text:"), gr.Radio(["None/Custom", "Happy", "Sad", "Angry", "Disgusted", "Arrogant"], value="None/Custom", label="Select emotion:"), gr.Textbox(lines=1, label="Enter prompt if [Custom] emotion:"), gr.Radio(["ultra_fast", "fast", "standard", "high_quality"], value="fast", label="Preset mode:"), gr.Dropdown( options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS, value="angie", # Default voice label="Select voice:" ), gr.Audio(label="Record voice (when selected custom_voice):", type="filepath"), gr.Dropdown( options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS, value="disabled", label="(Optional) Select second voice:" ), gr.Dropdown( options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS, value="disabled", label="(Optional) Select third voice:" ), gr.Number(value=0, precision=0, label="Seed (for reproducibility):"), ], outputs=[ gr.Audio(label="Sample of selected voice (first):"), gr.Audio(label="Output [Candidate 1]:"), gr.Audio(label="Output [Candidate 2]:"), gr.Audio(label="Output [Candidate 3]:"), ], title="RJ VOICE CLONING", description="

RJ VOICE CLONING

", css=".gradio-container { background-color: black; color: orange; }" ) # Launch the interface interface.launch(share=True) if __name__ == "__main__": main()