from pydantic import BaseModel from typing import Any import torch from modules.speaker import Speaker, speaker_mgr from modules.data import styles_mgr from pydub import AudioSegment from modules.ssml import merge_prompt from enum import Enum class ParamsTypeError(Exception): pass class AudioFormat(str, Enum): mp3 = "mp3" wav = "wav" class BaseResponse(BaseModel): message: str data: Any class Config: json_encoders = { torch.Tensor: lambda v: v.tolist(), Speaker: lambda v: v.to_json(), } def wav_to_mp3(wav_data, bitrate="48k"): audio = AudioSegment.from_wav( wav_data, ) return audio.export(format="mp3", bitrate=bitrate) def to_number(value, t, default=0): try: number = t(value) return number except (ValueError, TypeError) as e: return default def calc_spk_style(spk: str | int, style: str | int): voice_attrs = { "spk": None, "seed": None, "prompt1": None, "prompt2": None, "prefix": None, "temperature": None, } params = {} if type(spk) == int: voice_attrs["spk"] = spk elif type(spk) == str: if spk.isdigit(): voice_attrs["spk"] = int(spk) else: spker = speaker_mgr.get_speaker(spk) if spker: voice_attrs["spk"] = spker if type(style) == int or type(style) == float: raise ParamsTypeError("The style parameter cannot be a number.") elif type(style) == str and style != "": if style.isdigit(): raise ParamsTypeError("The style parameter cannot be a number.") else: style_params = styles_mgr.find_params_by_name(style) for k, v in style_params.items(): params[k] = v voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None} merge_prompt(voice_attrs, params) voice_attrs["spk"] = params.get("spk", voice_attrs.get("spk", None)) voice_attrs["seed"] = params.get("seed", voice_attrs.get("seed", None)) voice_attrs["temperature"] = params.get( "temp", voice_attrs.get("temperature", None) ) voice_attrs["prefix"] = params.get("prefix", voice_attrs.get("prefix", None)) voice_attrs["prompt1"] = params.get("prompt1", voice_attrs.get("prompt1", None)) voice_attrs["prompt2"] = params.get("prompt2", voice_attrs.get("prompt2", None)) if voice_attrs.get("temperature", "") == "min": # ref: https://github.com/2noise/ChatTTS/issues/123#issue-2326908144 voice_attrs["temperature"] = 0.000000000001 if voice_attrs.get("temperature", "") == "max": voice_attrs["temperature"] = 1 voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None} # print(voice_attrs) return voice_attrs