Spaces:
Runtime error
Runtime error
candlend
commited on
Commit
•
5dfa344
1
Parent(s):
8de4a8e
preprocess_text
Browse files- vits/vits_inferencer.py +21 -2
vits/vits_inferencer.py
CHANGED
@@ -24,13 +24,32 @@ default_noise_scale = 0.667
|
|
24 |
default_noise_scale_w = 0.8
|
25 |
default_length_scale = 1
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def get_text(text, hps):
|
|
|
28 |
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
29 |
if hps.data.add_blank:
|
30 |
text_norm = commons.intersperse(text_norm, 0)
|
31 |
text_norm = torch.LongTensor(text_norm)
|
32 |
return text_norm
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
class VitsInferencer:
|
35 |
def __init__(self, hps_path, device="cpu"):
|
36 |
print("init")
|
@@ -65,8 +84,8 @@ class VitsInferencer:
|
|
65 |
length_scale = gr.Slider(minimum=0, maximum=3, value=default_length_scale, step=0.001, label="length_scale(数值越大输出音频越长)")
|
66 |
|
67 |
tts_input = gr.TextArea(
|
68 |
-
label="
|
69 |
-
value="
|
70 |
tts_submit = gr.Button("合成", variant="primary")
|
71 |
tts_output = gr.Audio(label="Output")
|
72 |
gr.HTML('''
|
|
|
24 |
default_noise_scale_w = 0.8
|
25 |
default_length_scale = 1
|
26 |
|
27 |
+
replace_list = [
|
28 |
+
("candle", "刊豆"),
|
29 |
+
("end", "按的"),
|
30 |
+
("hoshimi", "吼西咪"),
|
31 |
+
("mua", "木啊"),
|
32 |
+
("hsm", "吼西咪"),
|
33 |
+
("ho", "齁"),
|
34 |
+
("na", "呐"),
|
35 |
+
("shi", "西"),
|
36 |
+
("mi", "咪"),
|
37 |
+
]
|
38 |
+
|
39 |
def get_text(text, hps):
|
40 |
+
text = preprocess_text(text)
|
41 |
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
42 |
if hps.data.add_blank:
|
43 |
text_norm = commons.intersperse(text_norm, 0)
|
44 |
text_norm = torch.LongTensor(text_norm)
|
45 |
return text_norm
|
46 |
|
47 |
+
def preprocess_text(text):
|
48 |
+
text = text.lower()
|
49 |
+
for src, dst in replace_list:
|
50 |
+
text = text.replace(src, dst)
|
51 |
+
return text
|
52 |
+
|
53 |
class VitsInferencer:
|
54 |
def __init__(self, hps_path, device="cpu"):
|
55 |
print("init")
|
|
|
84 |
length_scale = gr.Slider(minimum=0, maximum=3, value=default_length_scale, step=0.001, label="length_scale(数值越大输出音频越长)")
|
85 |
|
86 |
tts_input = gr.TextArea(
|
87 |
+
label="请输入文本(目前只支持汉字、单个英文字母和极个别专有名词,可以使用常用符号和空格来改变语调和停顿,请勿一次性输入过长文本)",
|
88 |
+
value="这里是爱喝奶茶,穿得也像奶茶魅力点是普通话二乙的星弥Hoshimi,晚上Ho")
|
89 |
tts_submit = gr.Button("合成", variant="primary")
|
90 |
tts_output = gr.Audio(label="Output")
|
91 |
gr.HTML('''
|