Applio-V3 / tabs /tts /tts.py
freshyfresco's picture
Upload 125 files (#2)
938411e verified
raw
history blame
9.93 kB
import os, sys
import gradio as gr
import regex as re
import json
import shutil
import datetime
import random
from core import (
run_tts_script,
)
from assets.i18n.i18n import I18nAuto
i18n = I18nAuto()
now_dir = os.getcwd()
sys.path.append(now_dir)
model_root = os.path.join(now_dir, "logs")
audio_root = os.path.join(now_dir, "assets", "audios")
sup_audioext = {
"wav",
"mp3",
"flac",
"ogg",
"opus",
"m4a",
"mp4",
"aac",
"alac",
"wma",
"aiff",
"webm",
"ac3",
}
names = [
os.path.join(root, file)
for root, _, files in os.walk(model_root, topdown=False)
for file in files
if (
file.endswith((".pth", ".onnx"))
and not (file.startswith("G_") or file.startswith("D_"))
)
]
indexes_list = [
os.path.join(root, name)
for root, _, files in os.walk(model_root, topdown=False)
for name in files
if name.endswith(".index") and "trained" not in name
]
audio_paths = [
os.path.join(root, name)
for root, _, files in os.walk(audio_root, topdown=False)
for name in files
if name.endswith(tuple(sup_audioext))
and root == audio_root
and "_output" not in name
]
def change_choices():
names = [
os.path.join(root, file)
for root, _, files in os.walk(model_root, topdown=False)
for file in files
if (
file.endswith((".pth", ".onnx"))
and not (file.startswith("G_") or file.startswith("D_"))
)
]
indexes_list = [
os.path.join(root, name)
for root, _, files in os.walk(model_root, topdown=False)
for name in files
if name.endswith(".index") and "trained" not in name
]
audio_paths = [
os.path.join(root, name)
for root, _, files in os.walk(audio_root, topdown=False)
for name in files
if name.endswith(tuple(sup_audioext))
and root == audio_root
and "_output" not in name
]
return (
{"choices": sorted(names), "__type__": "update"},
{"choices": sorted(indexes_list), "__type__": "update"},
{"choices": sorted(audio_paths), "__type__": "update"},
)
def get_indexes():
indexes_list = [
os.path.join(dirpath, filename)
for dirpath, _, filenames in os.walk(model_root)
for filename in filenames
if filename.endswith(".index") and "trained" not in filename
]
return indexes_list if indexes_list else ""
def match_index(model_file: str) -> tuple:
model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file)
model_file_name = os.path.split(model_files_trip)[
-1
] # Extract only the name, not the directory
# Check if the sid0strip has the specific ending format _eXXX_sXXX
if re.match(r".+_e\d+_s\d+$", model_file_name):
base_model_name = model_file_name.rsplit("_", 2)[0]
else:
base_model_name = model_file_name
sid_directory = os.path.join(model_root, base_model_name)
directories_to_search = [sid_directory] if os.path.exists(sid_directory) else []
directories_to_search.append(model_root)
matching_index_files = []
for directory in directories_to_search:
for filename in os.listdir(directory):
if filename.endswith(".index") and "trained" not in filename:
# Condition to match the name
name_match = any(
name.lower() in filename.lower()
for name in [model_file_name, base_model_name]
)
# If in the specific directory, it's automatically a match
folder_match = directory == sid_directory
if name_match or folder_match:
index_path = os.path.join(directory, filename)
if index_path in indexes_list:
matching_index_files.append(
(
index_path,
os.path.getsize(index_path),
" " not in filename,
)
)
if matching_index_files:
# Sort by favoring files without spaces and by size (largest size first)
matching_index_files.sort(key=lambda x: (-x[2], -x[1]))
best_match_index_path = matching_index_files[0][0]
return best_match_index_path
return ""
def save_to_wav(record_button):
if record_button is None:
pass
else:
path_to_file = record_button
new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
target_path = os.path.join(audio_root, os.path.basename(new_name))
shutil.move(path_to_file, target_path)
return target_path
def save_to_wav2(upload_audio):
file_path = upload_audio
target_path = os.path.join(audio_root, os.path.basename(file_path))
if os.path.exists(target_path):
os.remove(target_path)
shutil.copy(file_path, target_path)
return target_path
def delete_outputs():
for root, _, files in os.walk(audio_root, topdown=False):
for name in files:
if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
os.remove(os.path.join(root, name))
gr.Info(f"Outputs cleared!")
def tts_tab():
default_weight = random.choice(names) if names else ""
with gr.Row():
with gr.Row():
model_file = gr.Dropdown(
label=i18n("Voice Model"),
choices=sorted(names),
interactive=True,
value=default_weight,
allow_custom_value=True,
)
best_default_index_path = match_index(model_file.value)
index_file = gr.Dropdown(
label=i18n("Index File"),
choices=get_indexes(),
value=best_default_index_path,
interactive=True,
allow_custom_value=True,
)
with gr.Column():
refresh_button = gr.Button(i18n("Refresh"))
unload_button = gr.Button(i18n("Unload Voice"))
unload_button.click(
fn=lambda: ({"value": "", "__type__": "update"}),
inputs=[],
outputs=[model_file],
)
model_file.select(
fn=match_index,
inputs=[model_file],
outputs=[index_file],
)
json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json")
with open(json_path, "r") as file:
tts_voices_data = json.load(file)
short_names = [voice.get("ShortName", "") for voice in tts_voices_data]
tts_voice = gr.Dropdown(
label=i18n("TTS Voices"),
choices=short_names,
interactive=True,
value=None,
)
tts_text = gr.Textbox(
label=i18n("Text to Synthesize"),
placeholder=i18n("Enter text to synthesize"),
lines=3,
)
with gr.Accordion(i18n("Advanced Settings"), open=False):
with gr.Column():
output_tts_path = gr.Textbox(
label=i18n("Output Path for TTS Audio"),
placeholder=i18n("Enter output path"),
value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"),
interactive=True,
)
output_rvc_path = gr.Textbox(
label=i18n("Output Path for RVC Audio"),
placeholder=i18n("Enter output path"),
value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"),
interactive=True,
)
pitch = gr.Slider(
minimum=-24,
maximum=24,
step=1,
label=i18n("Pitch"),
value=0,
interactive=True,
)
filter_radius = gr.Slider(
minimum=0,
maximum=7,
label=i18n(
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
),
value=3,
step=1,
interactive=True,
)
index_rate = gr.Slider(
minimum=0,
maximum=1,
label=i18n("Search Feature Ratio"),
value=0.75,
interactive=True,
)
hop_length = gr.Slider(
minimum=1,
maximum=512,
step=1,
label=i18n("Hop Length"),
value=128,
interactive=True,
)
with gr.Column():
f0method = gr.Radio(
label=i18n("Pitch extraction algorithm"),
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
],
value="rmvpe",
interactive=True,
)
convert_button1 = gr.Button(i18n("Convert"))
with gr.Row(): # Defines output info + output audio download after conversion
vc_output1 = gr.Textbox(label=i18n("Output Information"))
vc_output2 = gr.Audio(label=i18n("Export Audio"))
refresh_button.click(
fn=change_choices,
inputs=[],
outputs=[model_file, index_file],
)
convert_button1.click(
fn=run_tts_script,
inputs=[
tts_text,
tts_voice,
pitch,
filter_radius,
index_rate,
hop_length,
f0method,
output_tts_path,
output_rvc_path,
model_file,
index_file,
],
outputs=[vc_output1, vc_output2],
)