File size: 3,985 Bytes
a136106 07898e1 a136106 198364e a136106 198364e 07929c9 198364e a136106 08ff18c a136106 98dcc3d 954fefd a136106 98dcc3d 198364e 08ff18c a136106 198364e a136106 198364e a136106 198364e a136106 198364e a136106 4242c93 a136106 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from typing import Dict, List, Any
from pathlib import Path
import sys
import base64
import io
from PIL import Image
from run.utils_ootd import get_mask_location
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute()
sys.path.insert(0, str(PROJECT_ROOT))
from preprocess.openpose.run_openpose import OpenPose
from preprocess.humanparsing.run_parsing import Parsing
from ootd.inference_ootd_hd import OOTDiffusionHD
from ootd.inference_ootd_dc import OOTDiffusionDC
class EndpointHandler():
def __init__(self, path=""):
"""
This method loads the model and other necessary components.
"""
self.gpu_id = 0
self.openpose_model = OpenPose(self.gpu_id)
self.parsing_model = Parsing(self.gpu_id)
self.model_hd = OOTDiffusionHD(self.gpu_id)
self.model_dc = OOTDiffusionDC(self.gpu_id)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
This method processes the incoming user request and returns the generated image(s).
Args:
data (Dict[str, Any]): User input dictionary, potentially containing:
- cloth_path (str): Path to the clothing image.
- image_path (str, optional): Path to the reference image (optional).
- category (str, optional): Clothing category (upperbody, lowerbody, dress).
- additional_params (optional): Other user-specified parameters.
Returns:
List[Dict[str, Any]]: A list of dictionaries, each containing:
- image (str): Base64 encoded string of the generated image.
- additional_outcomes (optional): Other outputs if applicable.
"""
inputs = data.pop("inputs", data)
cloth_image = inputs.get("cloth_image", "")
cloth_image = base64.b64decode(cloth_image)
cloth_img = Image.open(io.BytesIO(cloth_image)).resize((768, 1024))
model_image = inputs.get("model_image", "")
model_image = base64.b64decode(model_image)
model_img = Image.open(io.BytesIO(model_image)).resize((768, 1024))
model_type = inputs.get("model_type", 'hd')
category = inputs.get("category", 0)
scale = inputs.get("scale", 2.0)
step = inputs.get("step", 20)
sample = inputs.get("sample", 4)
seed = inputs.get("seed", -1)
category_dict = ['upperbody', 'lowerbody', 'dress']
category_dict_utils = ['upper_body', 'lower_body', 'dresses']
if model_type == 'hd':
self.model = self.model_hd
elif model_type == 'dc':
self.model = self.model_dc
else:
raise ValueError("Model path must end with \'hd\' or \'dc\'!")
if model_type == 'hd' and category != 0:
raise ValueError("model_type \'hd\' requires category == 0 (upperbody)!")
keypoints = self.openpose_model(model_img.resize((384, 512)))
model_parse, _ = self.parsing_model(model_img.resize((384, 512)))
mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
mask = mask.resize((768, 1024), Image.NEAREST)
mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
masked_vton_img = Image.composite(mask_gray, model_img, mask)
output_images = []
images = self.model(
model_type=model_type,
category=category_dict[category],
image_garm=cloth_img,
image_vton=masked_vton_img,
mask=mask,
image_ori=model_img,
num_samples=sample,
num_steps=step,
image_scale=scale,
seed=seed,
)
base64_image = images[0]
output_images.append({"image": base64_image})
return output_images
|