File size: 3,985 Bytes


from typing import Dict, List, Any
from pathlib import Path
import sys
import base64
import io
from PIL import Image
from run.utils_ootd import get_mask_location

PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute()
sys.path.insert(0, str(PROJECT_ROOT))

from preprocess.openpose.run_openpose import OpenPose
from preprocess.humanparsing.run_parsing import Parsing
from ootd.inference_ootd_hd import OOTDiffusionHD
from ootd.inference_ootd_dc import OOTDiffusionDC


class EndpointHandler():
    
    def __init__(self, path=""):
        """
        This method loads the model and other necessary components.
        """
        self.gpu_id = 0
        self.openpose_model = OpenPose(self.gpu_id)
        self.parsing_model = Parsing(self.gpu_id) 
        self.model_hd = OOTDiffusionHD(self.gpu_id)
        self.model_dc = OOTDiffusionDC(self.gpu_id)
        

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        This method processes the incoming user request and returns the generated image(s).
        
        Args:
            data (Dict[str, Any]): User input dictionary, potentially containing:
                - cloth_path (str): Path to the clothing image.
                - image_path (str, optional): Path to the reference image (optional).
                - category (str, optional): Clothing category (upperbody, lowerbody, dress).
                - additional_params (optional): Other user-specified parameters.
                
        Returns:
            List[Dict[str, Any]]: A list of dictionaries, each containing:
                - image (str): Base64 encoded string of the generated image.
                - additional_outcomes (optional): Other outputs if applicable.
        """

        inputs = data.pop("inputs", data)
        cloth_image = inputs.get("cloth_image", "")
        cloth_image = base64.b64decode(cloth_image)
        cloth_img = Image.open(io.BytesIO(cloth_image)).resize((768, 1024))
        model_image = inputs.get("model_image", "")
        model_image = base64.b64decode(model_image)
        model_img = Image.open(io.BytesIO(model_image)).resize((768, 1024))
        model_type = inputs.get("model_type", 'hd')
        category = inputs.get("category", 0) 
        scale = inputs.get("scale", 2.0)
        step = inputs.get("step", 20)
        sample = inputs.get("sample", 4)
        seed = inputs.get("seed", -1)
        
        category_dict = ['upperbody', 'lowerbody', 'dress']
        category_dict_utils = ['upper_body', 'lower_body', 'dresses']
        
        if model_type == 'hd':
            self.model = self.model_hd
        elif model_type == 'dc':
            self.model = self.model_dc
        else:
            raise ValueError("Model path must end with \'hd\' or \'dc\'!") 

        if model_type == 'hd' and category != 0:
            raise ValueError("model_type \'hd\' requires category == 0 (upperbody)!")

        keypoints = self.openpose_model(model_img.resize((384, 512)))
        model_parse, _ = self.parsing_model(model_img.resize((384, 512)))

        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
        mask = mask.resize((768, 1024), Image.NEAREST)
        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
        
        masked_vton_img = Image.composite(mask_gray, model_img, mask)
        output_images = []
        images = self.model(
                    model_type=model_type,
                    category=category_dict[category],
                    image_garm=cloth_img,
                    image_vton=masked_vton_img,
                    mask=mask,
                    image_ori=model_img,
                    num_samples=sample,
                    num_steps=step,
                    image_scale=scale,
                    seed=seed,
                    )
        base64_image = images[0]
        output_images.append({"image": base64_image})

        return output_images