Spaces:
Running
on
Zero
Running
on
Zero
Update webui.py
Browse files
webui.py
CHANGED
@@ -64,6 +64,7 @@ def change_instruction(mode_checkbox_group):
|
|
64 |
def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
|
65 |
seed, stream, speed):
|
66 |
stream=False
|
|
|
67 |
if prompt_wav_upload is not None:
|
68 |
prompt_wav = prompt_wav_upload
|
69 |
elif prompt_wav_record is not None:
|
@@ -111,28 +112,36 @@ def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, pro
|
|
111 |
if instruct_text != '':
|
112 |
gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
|
113 |
|
|
|
|
|
114 |
if mode_checkbox_group == '预训练音色':
|
115 |
logging.info('get sft inference request')
|
116 |
set_all_random_seed(seed)
|
117 |
for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
|
118 |
-
|
119 |
elif mode_checkbox_group == '3s极速复刻':
|
120 |
logging.info('get zero_shot inference request')
|
121 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
122 |
set_all_random_seed(seed)
|
123 |
for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
|
124 |
-
|
125 |
elif mode_checkbox_group == '跨语种复刻':
|
126 |
logging.info('get cross_lingual inference request')
|
127 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
128 |
set_all_random_seed(seed)
|
129 |
for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
|
130 |
-
|
131 |
else:
|
132 |
logging.info('get instruct inference request')
|
133 |
set_all_random_seed(seed)
|
134 |
for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
# SDK模型下载
|
138 |
import platform
|
@@ -171,8 +180,9 @@ def get_cosyvoice():
|
|
171 |
with cosyvoice_lock:
|
172 |
if cosyvoice_instance is not None:
|
173 |
return cosyvoice_instance
|
174 |
-
|
175 |
-
|
|
|
176 |
|
177 |
def load_sft_options():
|
178 |
sound_choices=get_cosyvoice().list_avaliable_spks()
|
|
|
64 |
def generate_audio(tts_text, mode_checkbox_group, sft_dropdown, prompt_text, prompt_wav_upload, prompt_wav_record, instruct_text,
|
65 |
seed, stream, speed):
|
66 |
stream=False
|
67 |
+
global cosyvoice_instance, model_dir
|
68 |
if prompt_wav_upload is not None:
|
69 |
prompt_wav = prompt_wav_upload
|
70 |
elif prompt_wav_record is not None:
|
|
|
112 |
if instruct_text != '':
|
113 |
gr.Info('您正在使用3s极速复刻模式,预训练音色/instruct文本会被忽略!')
|
114 |
|
115 |
+
audio_data_list = []
|
116 |
+
|
117 |
if mode_checkbox_group == '预训练音色':
|
118 |
logging.info('get sft inference request')
|
119 |
set_all_random_seed(seed)
|
120 |
for i in get_cosyvoice().inference_sft(tts_text, sft_dropdown, stream=stream, speed=speed):
|
121 |
+
audio_data_list.append(i['tts_speech'].numpy().flatten())
|
122 |
elif mode_checkbox_group == '3s极速复刻':
|
123 |
logging.info('get zero_shot inference request')
|
124 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
125 |
set_all_random_seed(seed)
|
126 |
for i in get_cosyvoice().inference_zero_shot(tts_text, prompt_text, prompt_speech_16k, stream=stream, speed=speed):
|
127 |
+
audio_data_list.append(i['tts_speech'].numpy().flatten())
|
128 |
elif mode_checkbox_group == '跨语种复刻':
|
129 |
logging.info('get cross_lingual inference request')
|
130 |
prompt_speech_16k = postprocess(load_wav(prompt_wav, prompt_sr))
|
131 |
set_all_random_seed(seed)
|
132 |
for i in get_cosyvoice().inference_cross_lingual(tts_text, prompt_speech_16k, stream=stream, speed=speed):
|
133 |
+
audio_data_list.append(i['tts_speech'].numpy().flatten())
|
134 |
else:
|
135 |
logging.info('get instruct inference request')
|
136 |
set_all_random_seed(seed)
|
137 |
for i in get_cosyvoice().inference_instruct(tts_text, sft_dropdown, instruct_text, stream=stream, speed=speed):
|
138 |
+
audio_data_list.append(i['tts_speech'].numpy().flatten())
|
139 |
+
|
140 |
+
# 将所有的音频数据拼接起来
|
141 |
+
concatenated_audio_data = np.concatenate(audio_data_list)
|
142 |
+
|
143 |
+
# 返回拼接后的音频数据和目标采样率
|
144 |
+
return (target_sr, concatenated_audio_data)
|
145 |
|
146 |
# SDK模型下载
|
147 |
import platform
|
|
|
180 |
with cosyvoice_lock:
|
181 |
if cosyvoice_instance is not None:
|
182 |
return cosyvoice_instance
|
183 |
+
else:
|
184 |
+
cosyvoice_instance=CosyVoice(model_dir)
|
185 |
+
return cosyvoice_instance
|
186 |
|
187 |
def load_sft_options():
|
188 |
sound_choices=get_cosyvoice().list_avaliable_spks()
|