|
import os |
|
import torch |
|
|
|
|
|
os.environ["TORCH_HOME"] = "/tmp/torch" |
|
os.makedirs("/tmp/torch", exist_ok=True) |
|
|
|
|
|
try: |
|
torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad", trust_repo=True) |
|
print("Silero VAD model preloaded successfully.") |
|
except Exception as e: |
|
print(f"Failed to preload Silero VAD: {e}") |
|
|
|
|
|
from openvoice import se_extractor |
|
from openvoice.api import ToneColorConverter |
|
from openvoice.config import AttrDict |
|
import gradio as gr |
|
import torchaudio |
|
|
|
|
|
import time |
|
import uuid |
|
|
|
|
|
os.environ["HF_HOME"] = "/tmp/huggingface" |
|
os.environ["HF_HUB_CACHE"] = "/tmp/huggingface" |
|
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" |
|
os.environ["MPLCONFIGDIR"] = "/tmp" |
|
os.environ["XDG_CACHE_HOME"] = "/tmp" |
|
os.environ["XDG_CONFIG_HOME"] = "/tmp" |
|
os.environ["NUMBA_DISABLE_CACHE"] = "1" |
|
os.makedirs("/tmp/huggingface", exist_ok=True) |
|
os.makedirs("/tmp/flagged", exist_ok=True) |
|
|
|
|
|
ckpt_converter = "checkpoints/converter/config.json" |
|
output_dir = "/tmp/outputs" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
tone_color_converter = ToneColorConverter(ckpt_converter) |
|
|
|
|
|
ref_speaker_embed = None |
|
|
|
def clone_and_speak(text, speaker_wav): |
|
if not speaker_wav: |
|
return "Please upload a reference .wav file." |
|
|
|
|
|
timestamp = str(int(time.time())) |
|
base_name = f"output_{timestamp}_{uuid.uuid4().hex[:6]}" |
|
output_wav = os.path.join(output_dir, f"{base_name}.wav") |
|
|
|
|
|
global ref_speaker_embed |
|
ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter) |
|
|
|
|
|
tone_color_converter.infer( |
|
text=text, |
|
speaker_id="openvoice", |
|
language="en", |
|
ref_speaker=speaker_wav, |
|
ref_embed=ref_speaker_embed, |
|
output_path=output_wav, |
|
top_k=10, |
|
temperature=0.3 |
|
) |
|
|
|
return output_wav |
|
|
|
|
|
gr.Interface( |
|
fn=clone_and_speak, |
|
inputs=[ |
|
gr.Textbox(label="Enter Text"), |
|
gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)") |
|
], |
|
outputs=gr.Audio(label="Synthesized Output"), |
|
flagging_dir="/tmp/flagged", |
|
title="Text to Voice using OpenVoice", |
|
description="Clone any voice (English) and generate speech using OpenVoice on CPU.", |
|
).launch() |
|
|