Spaces:
Runtime error
Runtime error
File size: 4,877 Bytes
d59aeff c59c783 d59aeff e42c164 d59aeff c59c783 f954003 c59c783 d59aeff e42c164 d59aeff 498fbde e42c164 cffda07 e99101c 3e2bb0e e99101c e42c164 72542b5 1f3bc5d 2e04df8 e99101c e42c164 0d86152 e42c164 48971d8 58a5d8e 3e2bb0e 050bcde 3e2bb0e e42c164 f656c09 f29a4ca b913e3e ca2d513 d59aeff c59c783 43e698f a0907be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import re
import random
import string
import librosa
import numpy as np
from pathlib import Path
from scipy.io.wavfile import write
from encoder import inference as encoder
from vocoder.hifigan import inference as gan_vocoder
from synthesizer.inference import Synthesizer
class Mandarin:
def __init__(self):
self.encoder_path = "encoder/saved_models/pretrained.pt"
self.vocoder_path = "vocoder/saved_models/pretrained/g_hifigan.pt"
self.config_fpath = "vocoder/hifigan/config_16k_.json"
self.accent = "synthesizer/saved_models/普通话.pt"
synthesizers_cache = {}
if synthesizers_cache.get(self.accent) is None:
self.current_synt = Synthesizer(Path(self.accent))
synthesizers_cache[self.accent] = self.current_synt
else:
self.current_synt = synthesizers_cache[self.accent]
encoder.load_model(Path(self.encoder_path))
gan_vocoder.load_model(Path(self.vocoder_path), self.config_fpath)
def setVoice(self, timbre):
self.timbre = timbre
wav, sample_rate, = librosa.load(self.timbre)
encoder_wav = encoder.preprocess_wav(wav, sample_rate)
self.embed, _, _ = encoder.embed_utterance(encoder_wav, return_partials=True)
def say(self, text):
texts = filter(None, text.split("\n"))
punctuation = "!,。、?!,.?::" # punctuate and split/clean text
processed_texts = []
for text in texts:
for processed_text in re.sub(r'[{}]+'.format(punctuation), '\n', text).split('\n'):
if processed_text:
processed_texts.append(processed_text.strip())
texts = processed_texts
embeds = [self.embed] * len(texts)
specs = self.current_synt.synthesize_spectrograms(texts, embeds)
spec = np.concatenate(specs, axis=1)
wav, sample_rate = gan_vocoder.infer_waveform(spec)
return wav, sample_rate
def greet(audio, text, voice=None):
print(f"Log print: audio name=[{audio.name}], text=[{text}]")
if voice is None:
voice = Mandarin()
voice.setVoice(audio.name)
voice.say("加载成功")
wav, sample_rate = voice.say(text)
output_file = "".join( random.sample(string.ascii_lowercase + string.digits, 11) ) + ".wav"
write(output_file, sample_rate, wav.astype(np.float32))
return output_file, voice
def new_greet(audio, text):
audio_path = audio
print(f"Log print: audio name=[{audio_path}], text=[{text}]")
voice.setVoice(audio_path)
wav, sample_rate = voice.say(text)
output_file = "".join( random.sample(string.ascii_lowercase + string.digits, 11) ) + ".wav"
write(output_file, sample_rate, wav.astype(np.float32))
return output_file
def main():
demo = gr.Interface(
fn=greet,
inputs=[gr.inputs.Audio(type="file"),"text", "state"],
outputs=[gr.outputs.Audio(type="file"), "state"],
title="Tacotron Zero-short Voice Clone (Chinese Version)"
)
demo.launch()
def new_main():
with gr.Blocks() as demo:
title = gr.Markdown("# <center>Tacotron Zero-short Voice Clone (Chinese Version)</center>")
gr.Markdown(
"""这是Zero-short语音克隆模型Tacotron的中文版本。<br>
1. 上传想要克隆的人的一段语音,长度3~8秒即可。<br>
2. 输入想要合成的文本<br>
3. 点击Submit按钮,稍等30秒便可合成语音
"""
)
with gr.Row():
with gr.Column():
# input_audio = gr.Audio(type="file", label="Source Audio", value="exp/lihao_01.wav")
input_audio = gr.Audio(type="filepath", label="Source Audio:")
input_text = gr.Textbox(value="大家好,我是正在搬砖的李昊,这是一段合成音频。", label="TTS Text:")
with gr.Row():
# clear = gr.ClearButton()
submit = gr.Button(value="Submit", variant='primary')
with gr.Column():
output_audio = gr.Audio(type="filepath", label="Output Audio:")
gr.Markdown("Audio Examples:")
gr.Examples(
examples=[["exp/lihao_01.wav", "大家好,我是正在搬砖的李昊,这是一段合成音频。"]],
inputs=[input_audio, input_text],
outputs=[output_audio],
fn=new_greet,
cache_examples=True,
)
_ = submit.click(new_greet, inputs=[input_audio, input_text], outputs=[output_audio])
demo.queue(concurrency_count=20)
demo.launch()
if __name__=="__main__":
voice = Mandarin()
# voice.setVoice(audio.name)
# voice.say("加载成功")
new_main()
|