Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c90b394
1
Parent(s):
f89f703
update gradio cached examples
Browse files- .gitignore +2 -1
- tts/gradio_api.py +16 -1
.gitignore
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
checkpoints
|
|
|
|
|
|
| 1 |
+
checkpoints
|
| 2 |
+
official_test_case
|
tts/gradio_api.py
CHANGED
|
@@ -26,7 +26,7 @@ os.system('huggingface-cli download ByteDance/MegaTTS3 --local-dir ./checkpoints
|
|
| 26 |
CUDA_AVAILABLE = torch.cuda.is_available()
|
| 27 |
infer_pipe = MegaTTS3DiTInfer(device='cuda' if CUDA_AVAILABLE else 'cpu')
|
| 28 |
|
| 29 |
-
@spaces.GPU(duration=
|
| 30 |
def forward_gpu(file_content, latent_file, inp_text, time_step, p_w, t_w):
|
| 31 |
resource_context = infer_pipe.preprocess(file_content, latent_file)
|
| 32 |
wav_bytes = infer_pipe.forward(resource_context, inp_text, time_step=time_step, p_w=p_w, t_w=t_w)
|
|
@@ -36,6 +36,14 @@ def model_worker(input_queue, output_queue, device_id):
|
|
| 36 |
while True:
|
| 37 |
task = input_queue.get()
|
| 38 |
inp_audio_path, inp_npy_path, inp_text, infer_timestep, p_w, t_w = task
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
try:
|
| 40 |
convert_to_wav(inp_audio_path)
|
| 41 |
wav_path = os.path.splitext(inp_audio_path)[0] + '.wav'
|
|
@@ -48,6 +56,7 @@ def model_worker(input_queue, output_queue, device_id):
|
|
| 48 |
traceback.print_exc()
|
| 49 |
print(task, str(e))
|
| 50 |
output_queue.put(None)
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
def main(inp_audio, inp_npy, inp_text, infer_timestep, p_w, t_w, processes, input_queue, output_queue):
|
|
@@ -85,6 +94,12 @@ if __name__ == '__main__':
|
|
| 85 |
gr.Number(label="Intelligibility Weight", value=1.4),
|
| 86 |
gr.Number(label="Similarity Weight", value=3.0)], outputs=[gr.Audio(label="Synthesized Audio")],
|
| 87 |
title="MegaTTS3",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
description="Upload a speech clip as a reference for timbre, " +
|
| 89 |
"upload the pre-extracted latent file, "+
|
| 90 |
"input the target text, and receive the cloned voice. "+
|
|
|
|
| 26 |
CUDA_AVAILABLE = torch.cuda.is_available()
|
| 27 |
infer_pipe = MegaTTS3DiTInfer(device='cuda' if CUDA_AVAILABLE else 'cpu')
|
| 28 |
|
| 29 |
+
@spaces.GPU(duration=60)
|
| 30 |
def forward_gpu(file_content, latent_file, inp_text, time_step, p_w, t_w):
|
| 31 |
resource_context = infer_pipe.preprocess(file_content, latent_file)
|
| 32 |
wav_bytes = infer_pipe.forward(resource_context, inp_text, time_step=time_step, p_w=p_w, t_w=t_w)
|
|
|
|
| 36 |
while True:
|
| 37 |
task = input_queue.get()
|
| 38 |
inp_audio_path, inp_npy_path, inp_text, infer_timestep, p_w, t_w = task
|
| 39 |
+
|
| 40 |
+
if inp_npy_path is None:
|
| 41 |
+
raise gr.Error("Please provide .npy file")
|
| 42 |
+
if (inp_audio_path[:-4] != inp_npy_path[:-4]):
|
| 43 |
+
raise gr.Error(".npy and .wav mismatch")
|
| 44 |
+
if len(inp_text) > 200:
|
| 45 |
+
raise gr.Error("input text is too long")
|
| 46 |
+
|
| 47 |
try:
|
| 48 |
convert_to_wav(inp_audio_path)
|
| 49 |
wav_path = os.path.splitext(inp_audio_path)[0] + '.wav'
|
|
|
|
| 56 |
traceback.print_exc()
|
| 57 |
print(task, str(e))
|
| 58 |
output_queue.put(None)
|
| 59 |
+
raise gr.Error("Generation failed")
|
| 60 |
|
| 61 |
|
| 62 |
def main(inp_audio, inp_npy, inp_text, infer_timestep, p_w, t_w, processes, input_queue, output_queue):
|
|
|
|
| 94 |
gr.Number(label="Intelligibility Weight", value=1.4),
|
| 95 |
gr.Number(label="Similarity Weight", value=3.0)], outputs=[gr.Audio(label="Synthesized Audio")],
|
| 96 |
title="MegaTTS3",
|
| 97 |
+
examples=[
|
| 98 |
+
['./official_test_case/范闲.wav', './official_test_case/范闲.npy', "你好呀,我是范闲。我给你读一段清泉石上流。"]
|
| 99 |
+
['./official_test_case/周杰伦1.wav', './official_test_case/周杰伦1.npy', "有的时候嘛,我去台湾开演唱会的时候,会很喜欢来一碗卤肉饭的。"]
|
| 100 |
+
['./official_test_case/keep_app.wav', './official_test_case/keep_app.npy', "Let do some exercise and practice more."]
|
| 101 |
+
],
|
| 102 |
+
cache_examples=True,
|
| 103 |
description="Upload a speech clip as a reference for timbre, " +
|
| 104 |
"upload the pre-extracted latent file, "+
|
| 105 |
"input the target text, and receive the cloned voice. "+
|