import io import logging import librosa import soundfile from flask import Flask, request, send_file from flask_cors import CORS #from infer_tools.infer_tool import Svc from inference_vst import SvcFish #from utils.hparams import hparams app = Flask(__name__) CORS(app) logging.getLogger('numba').setLevel(logging.WARNING) @app.route("/voiceChangeModel", methods=["POST"]) def voice_change_model(): request_form = request.form wave_file = request.files.get("sample", None) # 变调信息 f_pitch_change = float(request_form.get("fPitchChange", 0)) # 获取spkid int_speak_Id = int(request_form.get("sSpeakId", 0)) # DAW所需的采样率 daw_sample = int(float(request_form.get("sampleRate", 0))) # http获得wav文件并转换 input_wav_path = io.BytesIO(wave_file.read()) # 模型推理 _audio, _model_sr = svc_model.infer(input_wav_path, f_pitch_change, int_speak_Id, daw_sample) tar_audio = librosa.resample(_audio, _model_sr, daw_sample) # 返回音频 out_wav_path = io.BytesIO() soundfile.write(out_wav_path, tar_audio, daw_sample, format="wav") out_wav_path.seek(0) return send_file(out_wav_path, download_name="temp.wav", as_attachment=True) if __name__ == '__main__': # fish下只需传入下列参数 checkpoint_path = 'logs/DiffSVC/version_0/checkpoints/epoch=123-step=300000-valid_loss=0.17.ckpt' config_path = 'configs/svc_cn_hubert_soft_ms.py' # 加速倍率,None即采用配置文件的值 sampler_interval = None # 是否提取人声,是否合成非人声,以及人声响度增益 extract_vocals = True merge_non_vocals = False vocals_loudness_gain = 0.0 # 最大切片时长 max_slice_duration = 30.0 # 静音阈值 silence_threshold = 60 svc_model = SvcFish(checkpoint_path, config_path, sampler_interval=sampler_interval, extract_vocals=extract_vocals,merge_non_vocals=merge_non_vocals, vocals_loudness_gain=vocals_loudness_gain,silence_threshold=silence_threshold, max_slice_duration=max_slice_duration) # 此处与vst插件对应,不建议更改 app.run(port=6842, host="0.0.0.0", debug=False, threaded=False)