xVASynth-TTS / server.py
Pendrokar's picture
xVASynth v3 code for English
19c8b95
raw
history blame
26.9 kB
import os
import sys
import traceback
import multiprocessing
torch_dml_device = None
if __name__ == '__main__':
server = None
multiprocessing.freeze_support()
PROD = 'xVASynth.exe' in os.listdir(".")
# Saves me having to do backend re-compilations for every little UI hotfix
with open(f'{"./resources/app" if PROD else "."}/javascript/script.js', encoding="utf8") as f:
lines = f.read().split("\n")
APP_VERSION = lines[1].split('"v')[1].split('"')[0]
# Imports and logger setup
# ========================
try:
import python.pyinstaller_imports
import numpy
import logging
from logging.handlers import RotatingFileHandler
import json
from http.server import BaseHTTPRequestHandler, HTTPServer
from socketserver import ThreadingMixIn
from python.audio_post import run_audio_post, prepare_input_audio, mp_ffmpeg_output, normalize_audio, start_microphone_recording, move_recorded_file
import ffmpeg
except:
print(traceback.format_exc())
with open("./DEBUG_err_imports.txt", "w+") as f:
f.write(traceback.format_exc())
# Pyinstaller hack
# ================
try:
def script_method(fn, _rcb=None):
return fn
def script(obj, optimize=True, _frames_up=0, _rcb=None):
return obj
import torch.jit
torch.jit.script_method = script_method
torch.jit.script = script
import torch
import tqdm
import regex
except:
with open("./DEBUG_err_import_torch.txt", "w+") as f:
f.write(traceback.format_exc())
# ================
CPU_ONLY = not torch.cuda.is_available()
try:
logger = logging.getLogger('serverLog')
logger.setLevel(logging.DEBUG)
server_log_path = f'{os.path.dirname(os.path.realpath(__file__))}/{"../../../" if PROD else ""}/server.log'
fh = RotatingFileHandler(server_log_path, maxBytes=2*1024*1024, backupCount=5)
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)
formatter = logging.Formatter('%(asctime)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
logger.info(f'New session. Version: {APP_VERSION}. Installation: {"CPU" if CPU_ONLY else "CPU+GPU"} | Prod: {PROD} | Log path: {server_log_path}')
logger.orig_info = logger.info
def prefixed_log (msg):
logger.info(f'{logger.logging_prefix}{msg}')
def set_logger_prefix (prefix=""):
if len(prefix):
logger.logging_prefix = f'[{prefix}]: '
logger.log = prefixed_log
else:
logger.log = logger.orig_info
logger.set_logger_prefix = set_logger_prefix
logger.set_logger_prefix("")
except:
with open("./DEBUG_err_logger.txt", "w+") as f:
f.write(traceback.format_exc())
try:
logger.info(traceback.format_exc())
except:
pass
if CPU_ONLY:
try:
import torch_directml
torch_dml_device = torch_directml.device()
logger.info("Successfully got the torch DirectML device")
except Exception as e:
# I've implemented support for DirectML, but at the time of writing (08/04/2023, v0.1.13.1.dev230301), it's hella broken...
# Not a single model can successfully .forward() when switching to DirectML device from cpu. I'm leaving in the code however,
# as I'd still like to add support for it once things are more stable. This try/catch should run ok when it's installed
torch_dml_device = torch.device("cpu")
logger.exception("Failed to get torch DirectML; falling back to cpu device")
# ========================
try:
from python.plugins_manager import PluginManager
plugin_manager = PluginManager(APP_VERSION, PROD, CPU_ONLY, logger)
active_plugins = plugin_manager.get_active_plugins_count()
logger.info(f'Plugin manager loaded. {active_plugins} active plugins.')
except:
logger.info("Plugin manager FAILED.")
logger.info(traceback.format_exc())
plugin_manager.run_plugins(plist=plugin_manager.plugins["start"]["pre"], event="pre start", data=None)
# ======================== Models manager
modelsPaths = {}
try:
from python.models_manager import ModelsManager
models_manager = ModelsManager(logger, PROD, device="cpu")
except:
logger.info("Models manager failed to initialize")
logger.info(traceback.format_exc())
# ========================
print("Models ready")
logger.info("Models ready")
# Server
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
pass
class Handler(BaseHTTPRequestHandler):
def _set_response(self):
self.send_response(200)
self.send_header("Content-Type", "text/html")
self.end_headers()
def do_GET(self):
returnString = "[DEBUG] Get request for {}".format(self.path).encode("utf-8")
logger.info(returnString)
self._set_response()
self.wfile.write(returnString)
def do_POST(self):
global modelsPaths
post_data = ""
try:
content_length = int(self.headers['Content-Length'])
post_data = json.loads(self.rfile.read(content_length).decode('utf-8')) if content_length else {}
req_response = "POST request for {}".format(self.path)
print("POST")
print(self.path)
# For headless mode
if self.path == "/setAvailableVoices":
modelsPaths = json.loads(post_data["modelsPaths"])
if self.path == "/getAvailableVoices":
models = {}
for gameId in modelsPaths.keys():
models[gameId] = []
modelJSONs = sorted(os.listdir(modelsPaths[gameId]))
for fname in modelJSONs:
if fname.endswith(".json"):
with open(f'{modelsPaths[gameId]}/{fname}', "r") as f:
jsons = f.read()
metadata = json.loads(jsons)
models[gameId].append({
"modelType": metadata["modelType"],
"author": metadata["author"] if "author" in metadata else "",
"emb_size": metadata["emb_size"] if "emb_size" in metadata else 1,
"voiceId": metadata["games"][0]["voiceId"],
"voiceName": metadata["games"][0]["voiceName"],
"gender": metadata["games"][0]["gender"] if "gender" in metadata["games"][0] else "other",
"emb_i": metadata["games"][0]["emb_i"] if "emb_i" in metadata["games"][0] else 0
})
req_response = json.dumps(models)
if self.path == "/setVocoder":
logger.info("POST {}".format(self.path))
logger.info(post_data)
vocoder = post_data["vocoder"]
modelPath = post_data["modelPath"]
hifi_gan = "waveglow" not in vocoder
if vocoder=="qnd":
req_response = models_manager.load_model("hifigan", f'{"./resources/app" if PROD else "."}/python/hifigan/hifi.pt')
elif not hifi_gan:
req_response = models_manager.load_model(vocoder, modelPath)
req_response = "" if req_response is None else req_response
if self.path == "/stopServer":
logger.info("POST {}".format(self.path))
logger.info("STOPPING SERVER")
server.shutdown()
sys.exit()
if self.path == "/normalizeAudio":
input_path = post_data["input_path"]
output_path = post_data["output_path"]
req_response = normalize_audio(input_path, output_path)
if self.path == "/customEvent":
logger.info("POST {}".format(self.path))
plugin_manager.run_plugins(plist=plugin_manager.plugins["custom-event"], event="custom-event", data=post_data)
if self.path == "/setDevice":
logger.info("POST {}".format(self.path))
logger.info(post_data)
if post_data["device"] == "cpu":
logger.info("Setting torch device to CPU")
device = torch.device("cpu")
elif CPU_ONLY:
logger.info("Setting torch device to DirectML")
device = torch_dml_device
else:
logger.info("Setting torch device to CUDA")
device = torch.device("cuda:0")
models_manager.set_device(device)
if self.path == "/loadModel":
logger.info("POST {}".format(self.path))
logger.info(post_data)
ckpt = post_data["model"]
modelType = post_data["modelType"]
instance_index = post_data["instance_index"] if "instance_index" in post_data else 0
modelType = modelType.lower().replace(".", "_").replace(" ", "")
post_data["pluginsContext"] = json.loads(post_data["pluginsContext"])
n_speakers = post_data["model_speakers"] if "model_speakers" in post_data else None
base_lang = post_data["base_lang"] if "base_lang" in post_data else None
plugin_manager.run_plugins(plist=plugin_manager.plugins["load-model"]["pre"], event="pre load-model", data=post_data)
models_manager.load_model(modelType, ckpt+".pt", instance_index=instance_index, n_speakers=n_speakers, base_lang=base_lang)
plugin_manager.run_plugins(plist=plugin_manager.plugins["load-model"]["post"], event="post load-model", data=post_data)
if modelType=="fastpitch1_1":
models_manager.models_bank["fastpitch1_1"][instance_index].init_arpabet_dicts()
if self.path == "/getG2P":
text = post_data["text"]
base_lang = post_data["base_lang"]
model = models_manager.models("xVAPitch", instance_index=0)
returnString = model.getG2P(text, base_lang)
req_response = returnString
if self.path == "/synthesizeSimple":
logger.info("POST {}".format(self.path))
text = post_data["sequence"]
instance_index = post_data["instance_index"] if "instance_index" in post_data else 0
out_path = post_data["outfile"]
base_lang = post_data["base_lang"] if "base_lang" in post_data else None
base_emb = post_data["base_emb"] if "base_emb" in post_data else None
useCleanup = post_data["useCleanup"] if "useCleanup" in post_data else None
model = models_manager.models("xvapitch", instance_index=instance_index)
req_response = model.infer(plugin_manager, text, out_path, vocoder=None, \
speaker_i=None, editor_data=None, pace=None, old_sequence=None, \
globalAmplitudeModifier=None, base_lang=base_lang, base_emb=base_emb, useSR=False, useCleanup=useCleanup)
if self.path == "/synthesize":
logger.info("POST {}".format(self.path))
post_data["pluginsContext"] = json.loads(post_data["pluginsContext"])
instance_index = post_data["instance_index"] if "instance_index" in post_data else 0
# Handle the case where the vocoder remains selected on app start-up, with auto-HiFi turned off, but no setVocoder call is made before synth
continue_synth = True
if "waveglow" in post_data["vocoder"]:
waveglowPath = post_data["waveglowPath"]
req_response = models_manager.load_model(post_data["vocoder"], waveglowPath, instance_index=instance_index)
if req_response=="ENOENT":
continue_synth = False
device = post_data["device"] if "device" in post_data else models_manager.device_label
device = torch.device("cpu") if device=="cpu" else (torch_dml_device if CPU_ONLY else torch.device("cuda:0"))
models_manager.set_device(device, instance_index=instance_index)
if continue_synth:
plugin_manager.set_context(post_data["pluginsContext"])
plugin_manager.run_plugins(plist=plugin_manager.plugins["synth-line"]["pre"], event="pre synth-line", data=post_data)
modelType = post_data["modelType"]
text = post_data["sequence"]
pace = float(post_data["pace"])
out_path = post_data["outfile"]
base_lang = post_data["base_lang"] if "base_lang" in post_data else None
base_emb = post_data["base_emb"] if "base_emb" in post_data else None
pitch = post_data["pitch"] if "pitch" in post_data else None
energy = post_data["energy"] if "energy" in post_data else None
emAngry = post_data["emAngry"] if "emAngry" in post_data else None
emHappy = post_data["emHappy"] if "emHappy" in post_data else None
emSad = post_data["emSad"] if "emSad" in post_data else None
emSurprise = post_data["emSurprise"] if "emSurprise" in post_data else None
editorStyles = post_data["editorStyles"] if "editorStyles" in post_data else None
duration = post_data["duration"] if "duration" in post_data else None
speaker_i = post_data["speaker_i"] if "speaker_i" in post_data else None
useSR = post_data["useSR"] if "useSR" in post_data else None
useCleanup = post_data["useCleanup"] if "useCleanup" in post_data else None
vocoder = post_data["vocoder"]
globalAmplitudeModifier = float(post_data["globalAmplitudeModifier"]) if "globalAmplitudeModifier" in post_data else None
editor_data = [pitch, duration, energy, emAngry, emHappy, emSad, emSurprise, editorStyles]
old_sequence = post_data["old_sequence"] if "old_sequence" in post_data else None
model = models_manager.models(modelType.lower().replace(".", "_").replace(" ", ""), instance_index=instance_index)
req_response = model.infer(plugin_manager, text, out_path, vocoder=vocoder, \
speaker_i=speaker_i, editor_data=editor_data, pace=pace, old_sequence=old_sequence, \
globalAmplitudeModifier=globalAmplitudeModifier, base_lang=base_lang, base_emb=base_emb, useSR=useSR, useCleanup=useCleanup)
plugin_manager.run_plugins(plist=plugin_manager.plugins["synth-line"]["post"], event="post synth-line", data=post_data)
if self.path == "/synthesize_batch":
post_data["pluginsContext"] = json.loads(post_data["pluginsContext"])
plugin_manager.set_context(post_data["pluginsContext"])
plugin_manager.run_plugins(plist=plugin_manager.plugins["batch-synth-line"]["pre"], event="pre batch-synth-line", data=post_data)
modelType = post_data["modelType"]
linesBatch = post_data["linesBatch"]
speaker_i = post_data["speaker_i"]
vocoder = post_data["vocoder"]
outputJSON = post_data["outputJSON"]
useSR = post_data["useSR"]
useCleanup = post_data["useCleanup"]
with torch.no_grad():
try:
model = models_manager.models(modelType.lower().replace(".", "_").replace(" ", ""))
req_response = model.infer_batch(plugin_manager, linesBatch, outputJSON=outputJSON, vocoder=vocoder, speaker_i=speaker_i, useSR=useSR, useCleanup=useCleanup)
except RuntimeError as e:
if "CUDA out of memory" in str(e):
req_response = "CUDA OOM"
else:
req_response = traceback.format_exc()
logger.info(req_response)
except:
e = traceback.format_exc()
if "CUDA out of memory" in str(e):
req_response = "CUDA OOM"
else:
req_response = e
logger.info(e)
post_data["req_response"] = req_response
plugin_manager.run_plugins(plist=plugin_manager.plugins["batch-synth-line"]["post"], event="post batch-synth-line", data=post_data)
if self.path == "/runSpeechToSpeech":
logger.info("POST {}".format(self.path))
input_path = post_data["input_path"]
style_emb = post_data["style_emb"]
options = post_data["options"]
audio_out_path = post_data["audio_out_path"]
useSR = post_data["useSR"]
useCleanup = post_data["useCleanup"]
vc_strength = post_data["vc_strength"]
removeNoise = post_data["removeNoise"]
removeNoiseStrength = post_data["removeNoiseStrength"]
final_path = prepare_input_audio(PROD, logger, input_path, removeNoise, removeNoiseStrength)
models_manager.init_model("speaker_rep")
models_manager.load_model("speaker_rep", f'{"./resources/app" if PROD else "."}/python/xvapitch/speaker_rep/speaker_rep.pt')
try:
out = models_manager.models("xvapitch").run_speech_to_speech(final_path, audio_out_path.replace(".wav", "_tempS2S.wav"), style_emb, models_manager, plugin_manager, vc_strength=vc_strength, useSR=useSR, useCleanup=useCleanup)
if out=="TOO_SHORT":
req_response = "TOO_SHORT"
else:
data_out = ""
req_response = data_out
# For use by /outputAudio
post_data["input_path"] = audio_out_path.replace(".wav", "_tempS2S.wav")
post_data["output_path"] = audio_out_path
except ValueError:
req_response = traceback.format_exc()
logger.info(req_response)
except RuntimeError:
req_response = traceback.format_exc()
logger.info(req_response)
except Exception as e:
req_response = traceback.format_exc()
logger.info(req_response)
logger.info(repr(e))
if self.path == "/batchOutputAudio":
input_paths = post_data["input_paths"]
output_paths = post_data["output_paths"]
processes = post_data["processes"]
options = json.loads(post_data["options"])
# For plugins
extraInfo = {}
if "extraInfo" in post_data:
extraInfo = json.loads(post_data["extraInfo"])
extraInfo["pluginsContext"] = json.loads(post_data["pluginsContext"])
extraInfo["audio_options"] = options
extraInfo["input_paths"] = input_paths
extraInfo["output_paths"] = output_paths
extraInfo["processes"] = processes
extraInfo["ffmpeg"] = ffmpeg
plugin_manager.run_plugins(plist=plugin_manager.plugins["mp-output-audio"]["pre"], event="pre mp-output-audio", data=extraInfo)
req_response = mp_ffmpeg_output(PROD, logger, processes, input_paths, output_paths, options)
plugin_manager.run_plugins(plist=plugin_manager.plugins["mp-output-audio"]["post"], event="post mp-output-audio", data=extraInfo)
if self.path == "/outputAudio" or (self.path == "/runSpeechToSpeech" and req_response==""):
isBatchMode = post_data["isBatchMode"]
if not isBatchMode:
logger.info("POST /outputAudio")
input_path = post_data["input_path"]
output_path = post_data["output_path"]
options = json.loads(post_data["options"])
# For plugins
extraInfo = {}
if "extraInfo" in post_data:
extraInfo = json.loads(post_data["extraInfo"])
extraInfo["pluginsContext"] = json.loads(post_data["pluginsContext"])
extraInfo["audio_options"] = options
extraInfo["input_path"] = input_path
extraInfo["output_path"] = output_path
extraInfo["ffmpeg"] = ffmpeg
plugin_manager.run_plugins(plist=plugin_manager.plugins["output-audio"]["pre"], event="pre output-audio", data=extraInfo)
input_path = post_data["input_path"]
output_path = post_data["output_path"]
req_response = run_audio_post(PROD, None if isBatchMode else logger, input_path, output_path, options)
plugin_manager.run_plugins(plist=plugin_manager.plugins["output-audio"]["post"], event="post output-audio", data=extraInfo)
if self.path == "/refreshPlugins":
logger.info("POST {}".format(self.path))
status = plugin_manager.refresh_active_plugins()
logger.info("status")
logger.info(status)
req_response = ",".join(status)
if self.path == "/getWavV3StyleEmb":
logger.info("POST {}".format(self.path))
wav_path = post_data["wav_path"]
models_manager.init_model("speaker_rep")
load_resp = models_manager.load_model("speaker_rep", f'{"./resources/app" if PROD else "."}/python/xvapitch/speaker_rep/speaker_rep.pt')
if load_resp=="ENOENT":
req_response = "ENOENT"
else:
style_emb = models_manager.models("speaker_rep").compute_embedding(wav_path).squeeze().cpu().detach().numpy()
req_response = ",".join([str(v) for v in style_emb])
if self.path == "/computeEmbsAndDimReduction":
logger.info("POST {}".format(self.path))
models_manager.init_model("speaker_rep")
load_resp = models_manager.load_model("speaker_rep", f'{"./resources/app" if PROD else "."}/python/xvapitch/speaker_rep/speaker_rep.pt')
embs = models_manager.models("speaker_rep").reduce_data_dimension(post_data["mappings"], post_data["includeAllVoices"], post_data["onlyInstalled"], post_data["algorithm"])
req_response = embs
if self.path == "/checkReady":
modelsPaths = json.loads(post_data["modelsPaths"])
device = torch.device("cpu") if post_data["device"]=="cpu" else (torch_dml_device if CPU_ONLY else torch.device("cuda:0"))
models_manager.set_device(device)
req_response = "ready"
if self.path == "/updateARPABet":
if "fastpitch1_1" in list(models_manager.models_bank.keys()):
models_manager.models_bank["fastpitch1_1"].refresh_arpabet_dicts()
if self.path == "/start_microphone_recording":
start_microphone_recording(logger, models_manager, f'{"./resources/app" if PROD else "."}')
req_response = ""
if self.path == "/move_recorded_file":
file_path = post_data["file_path"]
move_recorded_file(PROD, logger, models_manager, f'{"./resources/app" if PROD else "."}', file_path)
self._set_response()
self.wfile.write(req_response.encode("utf-8"))
except Exception as e:
with open("./DEBUG_request.txt", "w+") as f:
f.write(traceback.format_exc())
f.write(str(post_data))
logger.info("Post Error:\n {}".format(repr(e)))
print(traceback.format_exc())
logger.info(traceback.format_exc())
try:
# server = HTTPServer(("",8008), Handler)
server = ThreadedHTTPServer(("",8008), Handler)
# Prevent issues with socket reuse
server.allow_reuse_address = True
except:
with open("./DEBUG_server_error.txt", "w+") as f:
f.write(traceback.format_exc())
logger.info(traceback.format_exc())
try:
plugin_manager.run_plugins(plist=plugin_manager.plugins["start"]["post"], event="post start", data=None)
print("Server ready")
logger.info("Server ready")
server.serve_forever()
except KeyboardInterrupt:
pass
server.server_close()