File size: 3,258 Bytes
990357a
fe48c10
4d74158
fe48c10
 
239000b
fe48c10
990357a
 
 
 
6d066e2
 
990357a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d066e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9e95cb
 
4d74158
 
 
8f9de4a
4d74158
f9e95cb
6d066e2
 
 
 
 
 
 
 
7fcb0ce
 
f9e95cb
 
 
 
4a4f8b1
f9e95cb
 
 
 
 
6d066e2
7f29d20
 
636f61d
f9e95cb
 
9243345
6d066e2
7fcb0ce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import gradio as gr
import random

os.system("pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html")
os.system("pip install numpy==1.22.0")

from modelscope.models.audio.tts import SambertHifigan
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

# model_0

model_dir = os.path.abspath("./pretrain_work_dir")

custom_infer_abs = {
    'voice_name':
    'F7',
    'am_ckpt':
    os.path.join(model_dir, 'tmp_am', 'ckpt'),
    'am_config':
    os.path.join(model_dir, 'tmp_am', 'config.yaml'),
    'voc_ckpt':
    os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
    'voc_config':
    os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',
             'config.yaml'),
    'audio_config':
    os.path.join(model_dir, 'data', 'audio_config.yaml'),
    'se_file':
    os.path.join(model_dir, 'data', 'se', 'se.npy')
}
kwargs = {'custom_ckpt': custom_infer_abs}

model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs)

inference = pipeline(task=Tasks.text_to_speech, model=model_id)

# model_1

model_dir1 = os.path.abspath("./jay/pretrain_work_dir")

custom_infer_abs1 = {
    'voice_name':
    'F7',
    'am_ckpt':
    os.path.join(model_dir1, 'tmp_am', 'ckpt'),
    'am_config':
    os.path.join(model_dir1, 'tmp_am', 'config.yaml'),
    'voc_ckpt':
    os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),
    'voc_config':
    os.path.join(model_dir1, 'orig_model', 'basemodel_16k', 'hifigan',
             'config.yaml'),
    'audio_config':
    os.path.join(model_dir1, 'data', 'audio_config.yaml'),
    'se_file':
    os.path.join(model_dir1, 'data', 'se', 'se.npy')
}
kwargs1 = {'custom_ckpt': custom_infer_abs1}

model_id1 = SambertHifigan(os.path.join(model_dir1, "orig_model"), **kwargs1)

inference1 = pipeline(task=Tasks.text_to_speech, model=model_id1)


# functions

def infer(text):
    output = inference(input=text)
    filename = str(random.randint(1, 1000000000000))
    
    with open(filename + "myfile.wav", mode='bx') as f:
        f.write(output["output_wav"])
    return filename + "myfile.wav"

def infer1(text):
    output = inference1(input=text)
    filename = str(random.randint(1, 1000000000000))
    
    with open(filename + "file.wav", mode='bx') as f:
        f.write(output["output_wav"])
    return filename + "file.wav"

app = gr.Blocks()

with app:
    gr.HTML("<center>"
            "<h1>🥳🎶🎡 - KanTTS中文声音克隆</h1>"
            "</center>")
    gr.Markdown("## <center>🌊 - 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")

   
    with gr.Row():
        with gr.Column():
            inp = gr.Textbox(lines=5, label="请填写您想要转换的中文文本")
            with gr.Row():
                btn = gr.Button("使用AI娜娜的声音", variant="primary")
                btn1 = gr.Button("使用AI小杰的声音", variant="primary")
        out = gr.Audio(label="为您生成的专属音频", type="filepath")

        btn.click(fn=infer, inputs=[inp], outputs=[out])
        btn1.click(fn=infer1, inputs=[inp], outputs=[out])
        
app.launch(show_error=True)