TUHs's picture
Update app.py
03661d9 verified
import subprocess
import random
import os
from pathlib import Path
import librosa
from scipy.io import wavfile
import numpy as np
import torch
import csv
import whisper
import gradio as gr
import soundfile as sf
import shutil
import datetime
from textwrap import dedent
os.system("pip install --upgrade Cython==0.29.35")
os.system("pip install pysptk --no-build-isolation")
os.system("pip install kantts==1.0.1 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")
os.system("pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")
import sox
from modelscope.tools import run_auto_label
from modelscope.models.audio.tts import SambertHifigan
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.audio.audio_utils import TtsTrainType
def infer_custom(model_name, text, noise_level):
custom_model_dir = os.path.join("/home/user/app/trained_model/", model_name) # 修改模型目录为用户指定的目录
custom_infer_abs = {
'voice_name':
'F7',
'am_ckpt':
os.path.join(custom_model_dir, 'tmp_am', 'ckpt'),
'am_config':
os.path.join(custom_model_dir, 'tmp_am', 'config.yaml'),
'voc_ckpt':
os.path.join(custom_model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
'voc_config':
os.path.join(custom_model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
'config.yaml'),
'audio_config':
os.path.join(custom_model_dir, 'data', 'audio_config.yaml'),
'se_file':
os.path.join(custom_model_dir, 'data', 'se', 'se.npy')
}
kwargs = {'custom_ckpt': custom_infer_abs}
model_id = SambertHifigan(os.path.join(custom_model_dir, "orig_model"), **kwargs)
inference = pipeline(task=Tasks.text_to_speech, model=model_id)
output = inference(input=text)
now = datetime.datetime.now()
date_str = now.strftime("%Y%m%d%H%M%S")
rand_num = random.randint(1000, 9999)
filename = date_str + str(rand_num)
with open(filename + ".wav", mode='bx') as f:
f.write(output["output_wav"])
y, sr = librosa.load(filename + ".wav")
S = librosa.stft(y)
noise = S[np.abs(S) < np.percentile(S, 95)]
noise_mean, noise_std = np.mean(noise), np.std(noise)
filter_ = np.ones_like(S)
filter_[np.abs(S) < noise_mean + noise_level * noise_std] = 0
filtered_S = filter_ * S
filtered_y = librosa.istft(filtered_S)
sf.write(filename + "customfile.wav", filtered_y, sr)
os.remove(filename + ".wav")
return filename + "customfile.wav"
trained_model = "/home/user/app/trained_model/"
def update_model_dropdown(inp21):
model_list = os.listdir(trained_model)
return gr.Dropdown(choices=model_list, value=inp21)
app = gr.Blocks()
with app:
gr.Markdown("# <center>🥳🎶🎡 - Sambert原神角色声音克隆</center>")
gr.Markdown("## <center>🌟 - 训练3分钟,推理10秒钟,中英真实拟声 </center>")
gr.Markdown("### <center>🌊 - 基于SambertHifiGan项目修改而来,克隆原神角色声音")
with gr.Tabs():
with gr.TabItem("声音合成"):
with gr.Row():
with gr.Column():
inp21 = gr.Dropdown(label="请选择一个模型", choices=os.listdir(trained_model))
inp22 = gr.Slider(label="降噪强度(为0时不降噪)", minimum=0, maximum=3, value=2)
with gr.Column():
inp23 = gr.Textbox(label="请在这里填写您想合成的文本", placeholder="想说却还没说的 还很多...", lines=3, interactive=True)
with gr.Column():
out21 = gr.Audio(type="filepath", label="为您合成的专属音频", interactive=False)
with gr.Row():
btn21 = gr.Button("刷新模型列表")
btn22 = gr.Button("一键推理", variant="primary")
btn21.click(update_model_dropdown, inp21, inp21)
btn22.click(infer_custom, [inp21, inp23, inp22], out21)
with gr.Accordion("📒 推理教程", open=True):
_ = f""" 如何推理声音:
* 第一步,选择一个你想要使用的模型,如果训练后保存的模型无法找到请点击“刷新模型列表”
* 第二步,在文本框处输入你想要生成的文本,选择降噪强度,如果无需降噪请将强度设为0
* 第三步,点击“一键生成”按钮,生成克隆后的语音
* !!注意!! 不要生成会对个人以及组织造成侵害的内容
* 此处使用的降噪算法为传统降噪算法,非AI降噪
* 计划是更新原神里所有玩家可控角色的模型,尽量每两个工作日一更
"""
gr.Markdown(dedent(_))
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
gr.HTML('''
<div class="footer">
<p>🌊🏞️🎶 - 劳逸结合是不错 但也别放松过头 --刻晴
</p>
</div>
''')
app.launch(show_error=True, share=False)