mocking-bird / control /mkgui /preprocess.py
smf2010's picture
Upload 204 files
a23d717 verified
from pydantic import BaseModel, Field
import os
from pathlib import Path
from enum import Enum
from typing import Any, Tuple
# Constants
EXT_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}ppg_extractor"
ENC_MODELS_DIRT = f"data{os.sep}ckpt{os.sep}encoder"
if os.path.isdir(EXT_MODELS_DIRT):
extractors = Enum('extractors', list((file.name, file) for file in Path(EXT_MODELS_DIRT).glob("**/*.pt")))
print("Loaded extractor models: " + str(len(extractors)))
else:
raise Exception(f"Model folder {EXT_MODELS_DIRT} doesn't exist.")
if os.path.isdir(ENC_MODELS_DIRT):
encoders = Enum('encoders', list((file.name, file) for file in Path(ENC_MODELS_DIRT).glob("**/*.pt")))
print("Loaded encoders models: " + str(len(encoders)))
else:
raise Exception(f"Model folder {ENC_MODELS_DIRT} doesn't exist.")
class Model(str, Enum):
VC_PPG2MEL = "ppg2mel"
class Dataset(str, Enum):
AIDATATANG_200ZH = "aidatatang_200zh"
AIDATATANG_200ZH_S = "aidatatang_200zh_s"
class Input(BaseModel):
# def render_input_ui(st, input) -> Dict:
# input["selected_dataset"] = st.selectbox(
# '้€‰ๆ‹ฉๆ•ฐๆฎ้›†',
# ("aidatatang_200zh", "aidatatang_200zh_s")
# )
# return input
model: Model = Field(
Model.VC_PPG2MEL, title="็›ฎๆ ‡ๆจกๅž‹",
)
dataset: Dataset = Field(
Dataset.AIDATATANG_200ZH, title="ๆ•ฐๆฎ้›†้€‰ๆ‹ฉ",
)
datasets_root: str = Field(
..., alias="ๆ•ฐๆฎ้›†ๆ น็›ฎๅฝ•", description="่พ“ๅ…ฅๆ•ฐๆฎ้›†ๆ น็›ฎๅฝ•๏ผˆ็›ธๅฏน/็ปๅฏน๏ผ‰",
format=True,
example="..\\trainning_data\\"
)
output_root: str = Field(
..., alias="่พ“ๅ‡บๆ น็›ฎๅฝ•", description="่พ“ๅ‡บ็ป“ๆžœๆ น็›ฎๅฝ•๏ผˆ็›ธๅฏน/็ปๅฏน๏ผ‰",
format=True,
example="..\\trainning_data\\"
)
n_processes: int = Field(
2, alias="ๅค„็†็บฟ็จ‹ๆ•ฐ", description="ๆ นๆฎCPU็บฟ็จ‹ๆ•ฐๆฅ่ฎพ็ฝฎ",
le=32, ge=1
)
extractor: extractors = Field(
..., alias="็‰นๅพๆๅ–ๆจกๅž‹",
description="้€‰ๆ‹ฉPPG็‰นๅพๆๅ–ๆจกๅž‹ๆ–‡ไปถ."
)
encoder: encoders = Field(
..., alias="่ฏญ้Ÿณ็ผ–็ ๆจกๅž‹",
description="้€‰ๆ‹ฉ่ฏญ้Ÿณ็ผ–็ ๆจกๅž‹ๆ–‡ไปถ."
)
class AudioEntity(BaseModel):
content: bytes
mel: Any
class Output(BaseModel):
__root__: Tuple[str, int]
def render_output_ui(self, streamlit_app, input) -> None: # type: ignore
"""Custom output UI.
If this method is implmeneted, it will be used instead of the default Output UI renderer.
"""
sr, count = self.__root__
streamlit_app.subheader(f"Dataset {sr} done processed total of {count}")
def preprocess(input: Input) -> Output:
"""Preprocess(้ข„ๅค„็†)"""
finished = 0
if input.model == Model.VC_PPG2MEL:
from models.ppg2mel.preprocess import preprocess_dataset
finished = preprocess_dataset(
datasets_root=Path(input.datasets_root),
dataset=input.dataset,
out_dir=Path(input.output_root),
n_processes=input.n_processes,
ppg_encoder_model_fpath=Path(input.extractor.value),
speaker_encoder_model=Path(input.encoder.value)
)
# TODO: pass useful return code
return Output(__root__=(input.dataset, finished))