Spaces:
Running
Running
File size: 5,414 Bytes
990357a fe48c10 4d74158 fe48c10 7ff2055 65e1de4 fe48c10 df68519 239000b fe48c10 990357a e0099fc 93c3d3d 6d066e2 990357a 6d066e2 f9e95cb 4d74158 8f9de4a 4d74158 f9e95cb 6d066e2 acda046 de47435 acda046 de47435 acda046 de47435 acda046 93c3d3d 8822333 1f8e9cf e0099fc 93c3d3d acda046 7fcb0ce f9e95cb f024cf5 e2d3317 f024cf5 f9e95cb 6d066e2 7f29d20 acda046 e2d3317 4b8c791 10f106d acda046 f9e95cb 9243345 acda046 10f106d a8923f2 7fcb0ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
import gradio as gr
import random
os.system("pip install --upgrade Cython==0.29.35")
os.system("pip install pysptk --no-build-isolation")
os.system("pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")
os.system("pip install librosa==0.9.2")
os.system("pip install numpy==1.22.0")
from modelscope.models.audio.tts import SambertHifigan
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from voicefixer import VoiceFixer
voicefixer = VoiceFixer()
# model_0
model_dir = os.path.abspath("./pretrain_work_dir")
custom_infer_abs = {
'voice_name':
'F7',
'am_ckpt':
os.path.join(model_dir, 'tmp_am', 'ckpt'),
'am_config':
os.path.join(model_dir, 'tmp_am', 'config.yaml'),
'voc_ckpt':
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
'voc_config':
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
'config.yaml'),
'audio_config':
os.path.join(model_dir, 'data', 'audio_config.yaml'),
'se_file':
os.path.join(model_dir, 'data', 'se', 'se.npy')
}
kwargs = {'custom_ckpt': custom_infer_abs}
model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs)
inference = pipeline(task=Tasks.text_to_speech, model=model_id)
# model_1
model_dir1 = os.path.abspath("./jay/pretrain_work_dir")
custom_infer_abs1 = {
'voice_name':
'F7',
'am_ckpt':
os.path.join(model_dir1, 'tmp_am', 'ckpt'),
'am_config':
os.path.join(model_dir1, 'tmp_am', 'config.yaml'),
'voc_ckpt':
os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
'voc_config':
os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan',
'config.yaml'),
'audio_config':
os.path.join(model_dir1, 'data', 'audio_config.yaml'),
'se_file':
os.path.join(model_dir1, 'data', 'se', 'se.npy')
}
kwargs1 = {'custom_ckpt': custom_infer_abs1}
model_id1 = SambertHifigan(os.path.join(model_dir1, "orig_model"), **kwargs1)
inference1 = pipeline(task=Tasks.text_to_speech, model=model_id1)
# functions
def infer(text):
output = inference(input=text)
filename = str(random.randint(1, 1000000000000))
with open(filename + "myfile.wav", mode='bx') as f:
f.write(output["output_wav"])
return filename + "myfile.wav"
def infer1(text):
output = inference1(input=text)
filename = str(random.randint(1, 1000000000000))
with open(filename + "file.wav", mode='bx') as f:
f.write(output["output_wav"])
return filename + "file.wav"
# upsample
import numpy as np
import torch
from hifi_gan_bwe import BandwidthExtender
from scipy.io.wavfile import write
MAX_LENGTH = 600.0
model = BandwidthExtender.from_pretrained("hifi-gan-bwe-10-42890e3-vctk-48kHz")
def extend(audio):
fs, x = audio
x = x[:int(MAX_LENGTH * fs)]
x = x.astype(np.float32) / 32767.0
if len(x.shape) == 1:
x = x[:, np.newaxis]
with torch.no_grad():
y = np.stack([model(torch.from_numpy(x), fs) for x in x.T]).T
y = (y * 32767.0).astype(np.int16)
fs = int(model.sample_rate)
write("upsample.wav", fs, y)
return "upsample.wav"
# denoise
def inference_denoise(audio):
voicefixer.restore(input=audio, # input wav file path
output="output.wav", # output wav file path
cuda=False, # whether to use gpu acceleration
mode = int(0)) # You can try out mode 0, 1 to find out the best result
return 'output.wav'
app = gr.Blocks()
with app:
gr.Markdown("# <center>🥳🎶🎡 - Sambert中文声音克隆</center>")
gr.Markdown("## <center>🌟 - 训练3分钟,推理5秒钟,中英文自然发音、真实拟声 </center>")
gr.Markdown("### <center>🌊 - 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
with gr.Row():
with gr.Column():
inp = gr.Textbox(lines=5, label="请填写您想要转换的中文文本")
with gr.Row():
btn = gr.Button("使用AI娜娜的声音", variant="primary")
btn1 = gr.Button("使用AI小杰的声音", variant="primary")
with gr.Column():
with gr.Row():
out = gr.Audio(label="为您生成的专属音频", interactive=False)
out1 = gr.Audio(label="更高采样率的专属音频", type="filepath", interactive=False)
out2 = gr.Audio(label="降噪后的高采样率音频", type="filepath", interactive=False)
with gr.Row():
btn2 = gr.Button("一键提高采样率")
btn3 = gr.Button("一键降噪")
btn.click(fn=infer, inputs=[inp], outputs=[out])
btn1.click(fn=infer1, inputs=[inp], outputs=[out])
btn2.click(fn=extend, inputs=[out], outputs=[out1])
btn3.click(fn=inference_denoise, inputs=[out1], outputs=[out2])
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
gr.HTML('''
<div class="footer">
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
</p>
</div>
''')
app.launch(show_error=True) |