Spaces:
Sleeping
Sleeping
File size: 7,266 Bytes
94b7459 8105949 94b7459 aa6c311 94b7459 993660a aa6c311 94b7459 aa6c311 94b7459 b5196ff 94b7459 8105949 94b7459 9e1609a 94b7459 25c791f 94b7459 e0b8c3d aa6c311 719921e e0b8c3d 7a71566 aa6c311 e0b8c3d aa6c311 e0b8c3d 94b7459 e0b8c3d 94b7459 e0b8c3d 94b7459 e0b8c3d aa6c311 e0b8c3d 94b7459 e0b8c3d 94b7459 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import argparse
import os
import sys
import tempfile
import gradio as gr
import librosa.display
import numpy as np
import os
import torch
import torchaudio
import traceback
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts
import spaces
def clear_gpu_cache():
# clear the GPU cache
if torch.cuda.is_available():
torch.cuda.empty_cache()
XTTS_MODEL = None
def load_model(choice):
global XTTS_MODEL
clear_gpu_cache()
if choice == "dingzhen":
xtts_checkpoint="./finetune_models/run/training/GPT_XTTS_FT-July-04-2024_01+29PM-44c61c9/best_model.pth"
xtts_config="./finetune_models/run/training/XTTS_v2.0_original_model_files/config.json"
xtts_vocab="./finetune_models/run/training/XTTS_v2.0_original_model_files/vocab.json"
elif choice == "kobe":
xtts_checkpoint="./finetune_models_kobe/run/training/GPT_XTTS_FT-July-05-2024_09+09AM-44c61c9/best_model.pth"
xtts_config="./finetune_models_kobe/run/training/XTTS_v2.0_original_model_files/config.json"
xtts_vocab="./finetune_models_kobe/run/training/XTTS_v2.0_original_model_files/vocab.json"
if not xtts_checkpoint or not xtts_config or not xtts_vocab:
return "You need to run the previous steps or manually set the `XTTS checkpoint path`, `XTTS config path`, and `XTTS vocab path` fields !!"
config = XttsConfig()
config.load_json(xtts_config)
XTTS_MODEL = Xtts.init_from_config(config)
print("Loading XTTS model! ")
XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, speaker_file_path="./speakers_xtts.pth", use_deepspeed=False)
if torch.cuda.is_available():
XTTS_MODEL.cuda()
print("模型已成功加载!")
return "模型已成功加载!"
@spaces.GPU
def run_tts(lang, tts_text, speaker_audio_file):
#print(XTTS_MODEL)
#print(speaker_audio_file)
if XTTS_MODEL is None or not speaker_audio_file:
return "您需要先执行第1步 - 加载模型", None, None
speaker_audio_file = "".join([item for item in speaker_audio_file.strip().split("\n") if item != ""])
gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(audio_path=speaker_audio_file, gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, max_ref_length=XTTS_MODEL.config.max_ref_len, sound_norm_refs=XTTS_MODEL.config.sound_norm_refs)
out = XTTS_MODEL.inference(
text=tts_text.strip(),
language=lang,
gpt_cond_latent=gpt_cond_latent,
speaker_embedding=speaker_embedding,
temperature=XTTS_MODEL.config.temperature, # Add custom parameters here
length_penalty=XTTS_MODEL.config.length_penalty,
repetition_penalty=XTTS_MODEL.config.repetition_penalty,
top_k=XTTS_MODEL.config.top_k,
top_p=XTTS_MODEL.config.top_p,
)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
out_path = fp.name
torchaudio.save(out_path, out["wav"], 24000)
return "推理成功,快来听听吧!", out_path, speaker_audio_file
# define a logger to redirect
class Logger:
def __init__(self, filename="log.out"):
self.log_file = filename
self.terminal = sys.stdout
self.log = open(self.log_file, "w")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
self.terminal.flush()
self.log.flush()
def isatty(self):
return False
# redirect stdout and stderr to a file
sys.stdout = Logger()
sys.stderr = sys.stdout
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
import logging
logging.basicConfig(
level=logging.WARNING,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.StreamHandler(sys.stdout)
]
)
def read_logs():
sys.stdout.flush()
with open(sys.stdout.log_file, "r") as f:
return f.read()
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown("# <center>🌊💕🎶 XTTS 微调:2分钟语音,开启中日英16种语言真实拟声</center>")
gr.Markdown("## <center>🌟 只需2分钟的语音,一键在线微调 最强多语种模型</center>")
gr.Markdown("### <center>🤗 更多精彩,尽在[滔滔AI](https://www.talktalkai.com/);滔滔AI,为爱滔滔!💕</center>")
with gr.Row():
with gr.Column() as col1:
choice = gr.Dropdown(label="请选择您喜欢的模型", value="dingzhen", choices=["dingzhen", "kobe"])
progress_load = gr.Label(
label="模型加载进程"
)
load_btn = gr.Button(value="1. 加载已训练好的模型", variant="primary")
with gr.Column() as col2:
speaker_reference_audio = gr.Dropdown(
label="请选择一个参考音频",
info="不同参考音频对应的合成效果不同。您可以尝试多次,每次选择一个音频路径",
value="dingzhen1.wav",
choices=["dingzhen1.wav", "dingzhen2.wav", "dingzhen3.wav", "dingzhen4.wav", "dingzhen5.wav", "dingzhen6.wav"]
)
tts_text = gr.Textbox(
label="请填写语音合成的文本🍻",
placeholder="想说却还没说的,还很多",
)
tts_language = gr.Dropdown(
label="请选择文本对应的语言",
value="zh",
choices=[
"en",
"es",
"fr",
"de",
"it",
"pt",
"pl",
"tr",
"ru",
"nl",
"cs",
"ar",
"zh",
"hu",
"ko",
"ja",
]
)
tts_btn = gr.Button(value="2. 开启AI语音之旅吧💕", variant="primary")
with gr.Column() as col3:
progress_gen = gr.Label(
label="语音合成进程"
)
tts_output_audio = gr.Audio(label="为您合成的专属音频🎶")
reference_audio = gr.Audio(label="您使用的参考音频")
load_btn.click(
fn=load_model,
inputs=[
choice
],
outputs=[progress_load],
)
tts_btn.click(
fn=run_tts,
inputs=[
tts_language,
tts_text,
speaker_reference_audio,
],
outputs=[progress_gen, tts_output_audio, reference_audio],
)
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。请自觉合规使用此程序,程序开发者不负有任何责任。</center>")
gr.HTML('''
<div class="footer">
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
</p>
</div>
''')
app.queue().launch(
share=True,
show_error=True,
) |