File size: 5,394 Bytes
178b44a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03661d9
178b44a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c358fd
 
178b44a
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import subprocess
import random
import os
from pathlib import Path
import librosa
from scipy.io import wavfile
import numpy as np
import torch
import csv
import whisper
import gradio as gr
import soundfile as sf
import shutil
import datetime
from textwrap import dedent

os.system("pip install --upgrade Cython==0.29.35")
os.system("pip install pysptk --no-build-isolation")
os.system("pip install kantts==1.0.1 -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")
os.system("pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")

import sox


from modelscope.tools import run_auto_label
from modelscope.models.audio.tts import SambertHifigan
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.audio.audio_utils import TtsTrainType


def infer_custom(model_name, text, noise_level): 

  custom_model_dir = os.path.join("/home/user/app/trained_model/", model_name) # 修改模型目录为用户指定的目录

  custom_infer_abs = {
      'voice_name':
      'F7', 
      'am_ckpt':
      os.path.join(custom_model_dir, 'tmp_am', 'ckpt'),
      'am_config':
      os.path.join(custom_model_dir, 'tmp_am', 'config.yaml'),
      'voc_ckpt':
      os.path.join(custom_model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
      'voc_config':
      os.path.join(custom_model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
              'config.yaml'),
      'audio_config':
      os.path.join(custom_model_dir, 'data', 'audio_config.yaml'),
      'se_file':
      os.path.join(custom_model_dir, 'data', 'se', 'se.npy')
  }
  kwargs = {'custom_ckpt': custom_infer_abs}

  model_id = SambertHifigan(os.path.join(custom_model_dir, "orig_model"), **kwargs)

  inference = pipeline(task=Tasks.text_to_speech, model=model_id)
  output = inference(input=text)


  now = datetime.datetime.now()
  date_str = now.strftime("%Y%m%d%H%M%S")
  rand_num = random.randint(1000, 9999)
  filename = date_str + str(rand_num)


  with open(filename + ".wav", mode='bx') as f:
      f.write(output["output_wav"])

  y, sr = librosa.load(filename + ".wav")

  S = librosa.stft(y)

  noise = S[np.abs(S) < np.percentile(S, 95)]
  noise_mean, noise_std = np.mean(noise), np.std(noise)

  filter_ = np.ones_like(S)
  filter_[np.abs(S) < noise_mean + noise_level * noise_std] = 0

  filtered_S = filter_ * S

  filtered_y = librosa.istft(filtered_S)

  sf.write(filename + "customfile.wav", filtered_y, sr)

  os.remove(filename + ".wav")

  return filename + "customfile.wav"


trained_model = "/home/user/app/trained_model/"



def update_model_dropdown(inp21):

    model_list = os.listdir(trained_model)

    return gr.Dropdown(choices=model_list, value=inp21)


app = gr.Blocks()

with app:
    gr.Markdown("# <center>🥳🎶🎡 - Sambert原神角色声音克隆</center>")
    gr.Markdown("## <center>🌟 - 训练3分钟,推理10秒钟,中英真实拟声 </center>")
    gr.Markdown("### <center>🌊 - 基于SambertHifiGan项目修改而来,克隆原神角色声音")

    with gr.Tabs(): 
        with gr.TabItem("声音合成"): 
            with gr.Row():
              with gr.Column():
                inp21 = gr.Dropdown(label="请选择一个模型", choices=os.listdir(trained_model)) 
                inp22 = gr.Slider(label="降噪强度(为0时不降噪)", minimum=0, maximum=3, value=2)
              with gr.Column():
                inp23 = gr.Textbox(label="请在这里填写您想合成的文本", placeholder="想说却还没说的 还很多...", lines=3,  interactive=True)
              with gr.Column():
                out21 = gr.Audio(type="filepath", label="为您合成的专属音频", interactive=False)
            with gr.Row():
              btn21 = gr.Button("刷新模型列表") 
              btn22 = gr.Button("一键推理", variant="primary") 

            btn21.click(update_model_dropdown, inp21, inp21)
            btn22.click(infer_custom, [inp21, inp23, inp22], out21) 
            with gr.Accordion("📒 推理教程", open=True):
              _ = f""" 如何推理声音: 
                  * 第一步,选择一个你想要使用的模型,如果训练后保存的模型无法找到请点击“刷新模型列表”
                  * 第二步,在文本框处输入你想要生成的文本,选择降噪强度,如果无需降噪请将强度设为0
                  * 第三步,点击“一键生成”按钮,生成克隆后的语音
                  * !!注意!!  不要生成会对个人以及组织造成侵害的内容
                  * 此处使用的降噪算法为传统降噪算法,非AI降噪
                  * 计划是更新原神里所有玩家可控角色的模型,尽量每两个工作日一更

                  """
                  
              gr.Markdown(dedent(_))                
           
    gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
    gr.HTML('''
        <div class="footer">
                    <p>🌊🏞️🎶 - 劳逸结合是不错 但也别放松过头 --刻晴
                    </p>
        </div>
    ''')

app.launch(show_error=True, share=False)