import torch import commons import utils from models import SynthesizerTrn from text.symbols import symbols from text import text_to_sequence import io from scipy.io.wavfile import write from flask import Flask, request import threading app = Flask(__name__) mutex = threading.Lock() def get_text(text, hps): text_norm = text_to_sequence(text, hps.data.text_cleaners) if hps.data.add_blank: text_norm = commons.intersperse(text_norm, 0) text_norm = torch.LongTensor(text_norm) return text_norm hps = utils.get_hparams_from_file("./configs/ljs_mb_istft_vits.json") net_g = SynthesizerTrn( len(symbols), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, **hps.model) _ = net_g.eval() # _ = utils.load_checkpoint("../tempbest.pth", net_g, None) import time def tts(txt): audio = None if mutex.acquire(blocking=False): try: stn_tst = get_text(txt, hps) with torch.no_grad(): x_tst = stn_tst.unsqueeze(0) x_tst_lengths = torch.LongTensor([stn_tst.size(0)]) t1 = time.time() audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0, 0].data.float().numpy() t2 = time.time() print("推理时间:", (t2 - t1), "s") finally: mutex.release() return audio @app.route('/tts') def text_api(): text = request.args.get('text','') bytes_wav = bytes() byte_io = io.BytesIO(bytes_wav) audio = tts(text) if audio is None: return "服务器忙" write(byte_io, 22050, audio) wav_bytes = byte_io.read() # audio_data = base64.b64encode(wav_bytes).decode('UTF-8') return wav_bytes, 200, {'Content-Type': 'audio/wav'} if __name__ == '__main__': app.run("0.0.0.0", 8080)