Spaces:

mukeshpandey2628
/

GT_VTR3_1

Runtime error

App Files Files Community

Ubuntu commited on Jul 23, 2024

Commit

6e6426e

1 Parent(s): 3bc69b8

fixed issuse with model with dress

Browse files

Files changed (12) hide show

.gitignore +15 -0
app.py +62 -14
celery_worker.py +63 -0
ootd/inference_ootd_dc.py +6 -0
ootd/pipelines_ootd/pipeline_ootd.py +88 -77
preprocess/humanparsing/parsing_api.py +70 -0
preprocess/openpose/run_openpose.py +2 -2
requirements.txt +3 -1
run/cloths_db.py +2 -2
run/gradio_ootd.py +202 -82
run/testing.py +167 -0
run/utils_ootd.py +6 -1

.gitignore CHANGED Viewed

@@ -19,3 +19,18 @@ run/examples/model/male/male_side.png
 run/examples/model/male/male_small_38.png
 run/examples/model/male/male_small.png
 run/examples/model/male/male_xl_45.png

 run/examples/model/male/male_small_38.png
 run/examples/model/male/male_small.png
 run/examples/model/male/male_xl_45.png
+__pycache__/app.cpython-310.pyc
+__pycache__/app2.cpython-310.pyc
+__pycache__/celery_worker.cpython-310.pyc
+ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
+ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
+ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
+run/examples/garment/male_tshirt1.png
+colored_parsing.png
+gram_img.png
+vton_img.png
+temp_images/garm_input.png
+temp_images/vton_input.png
+ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
+preprocess/humanparsing/datasets/__pycache__/simple_extractor_dataset.cpython-310.pyc

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from flask_cors import CORS
 import logging
 import gc
 import os
 from io import BytesIO
 from pathlib import Path
 import sys
@@ -19,6 +21,7 @@ from ootd.inference_ootd_dc import OOTDiffusionDC
 PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
 sys.path.insert(0, str(PROJECT_ROOT))
 #run python garbage collector and nvidia cuda clear memory
@@ -29,6 +32,8 @@ torch.cuda.empty_cache()
 # Setup Flask server
 app = Flask(__name__)
 CORS(app, origins="*")  # Enable CORS for the entire app
 logger = logging.getLogger()
@@ -58,15 +63,32 @@ if not os.path.exists(UPLOAD_FOLDER):
     os.makedirs(UPLOAD_FOLDER)
 def process_dc(vton_img, garm_img, category):
     model_type = 'dc'
-    # if category == 'Upper-body':
-    #     category = 0
-    # elif category == 'Lower-body':
-    #     category = 1
-    # else:
-    #     category = 2
     with torch.no_grad():
         # openpose_model.preprocessor.body_estimation.model.to('cuda')
@@ -74,8 +96,8 @@ def process_dc(vton_img, garm_img, category):
         # ootd_model_dc.image_encoder.to('cuda')
         # ootd_model_dc.text_encoder.to('cuda')
-        garm_img = Image.open(garm_img).convert('RGB').resize((768, 1024))
-        vton_img = Image.open(vton_img).convert('RGB').resize((768, 1024))
         keypoints = openpose_model(vton_img.resize((384, 512)))
         print(len(keypoints["pose_keypoints_2d"]))
@@ -134,6 +156,19 @@ def process_dc(vton_img, garm_img, category):
         print(f'category is {category}')
         images = ootd_model_dc(
             model_type=model_type,
             category=category_dict[category],
@@ -141,12 +176,15 @@ def process_dc(vton_img, garm_img, category):
             image_vton=masked_vton_img,
             mask=mask,
             image_ori=vton_img,
-            num_samples=1,
             num_steps=10,
-            image_scale=  1.0,
-            seed=-1,
         )
     return images
@@ -161,7 +199,9 @@ def root():
         response_data = {"message": "Internal server Error"}
         return jsonify(response_data), 500
 #write Flask api name "generate" with POST method that will input 2 images and return 1 image
 @app.route('/generate', methods=['POST'])
@@ -193,9 +233,14 @@ def generate():
     #     category = 2
     try:
         garm_img = request.files['garm_img']
         vton_img = request.files['vton_img']
-        category = 0  # Default to Upper-body if not specified
         # Save the uploaded files
         garm_path = os.path.join(UPLOAD_FOLDER, 'garm_input.png')
@@ -222,6 +267,9 @@ def generate():
         output_image.save(img_byte_arr, format='PNG')
         img_byte_arr = img_byte_arr.getvalue()
         return Response(img_byte_arr, mimetype='image/png')
     except Exception as e:
@@ -239,4 +287,4 @@ if __name__ == '__main__':
-# nohup gunicorn -b 0.0.0.0:5003 sentiment_api:app &

 import logging
 import gc
 import os
+from threading import Thread
+from flask_sse import sse
 from io import BytesIO
 from pathlib import Path
 import sys
 PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
 sys.path.insert(0, str(PROJECT_ROOT))
+from queue import Queue
 #run python garbage collector and nvidia cuda clear memory
 # Setup Flask server
 app = Flask(__name__)
 CORS(app, origins="*")  # Enable CORS for the entire app
+app.config["REDIS_URL"] = "redis://localhost:6379"
+app.register_blueprint(sse, url_prefix='/stream')
 logger = logging.getLogger()
     os.makedirs(UPLOAD_FOLDER)
+# progress_queue = Queue()
+# def progress_callback(step, total_steps):
+#     if total_steps is not None and total_steps > 0:
+#         progress = int((step + 1) / total_steps * 100)
+#         progress_queue.put(progress)
+#     else:
+#         progress_queue.put(step + 1)
+def progress_callback(step, total_steps):
+    if total_steps is not None and total_steps > 0:
+        progress = int((step + 1) / total_steps * 100)
+        sse.publish({"progress": progress}, type='progress')
+    else:
+        sse.publish({"step": step + 1}, type='progress')
 def process_dc(vton_img, garm_img, category):
     model_type = 'dc'
+    if category == 'Upper-body':
+        category = 0
+    elif category == 'Lower-body':
+        category = 1
+    else:
+        category = 2
     with torch.no_grad():
         # openpose_model.preprocessor.body_estimation.model.to('cuda')
         # ootd_model_dc.image_encoder.to('cuda')
         # ootd_model_dc.text_encoder.to('cuda')
+        garm_img = Image.open(garm_img).resize((768, 1024))
+        vton_img = Image.open(vton_img).resize((768, 1024))
         keypoints = openpose_model(vton_img.resize((384, 512)))
         print(len(keypoints["pose_keypoints_2d"]))
         print(f'category is {category}')
+        # images = ootd_model_dc(
+        #     model_type=model_type,
+        #     category=category_dict[category],
+        #     image_garm=garm_img,
+        #     image_vton=masked_vton_img,
+        #     mask=mask,
+        #     image_ori=vton_img,
+        #     num_samples=3,
+        #     num_steps=20,
+        #     image_scale=  2.0,
+        #     seed=-1,
+        # )
         images = ootd_model_dc(
             model_type=model_type,
             category=category_dict[category],
             image_vton=masked_vton_img,
             mask=mask,
             image_ori=vton_img,
+            num_samples=2,
             num_steps=10,
+            image_scale=2.0,
+            seed=42,
+            progress_callback=progress_callback,
+            progress_interval=1,  # Update progress every step
         )
     return images
         response_data = {"message": "Internal server Error"}
         return jsonify(response_data), 500
+@app.route('/stream')
+def stream():
+    return Response(sse.stream(), content_type='text/event-stream')
 #write Flask api name "generate" with POST method that will input 2 images and return 1 image
 @app.route('/generate', methods=['POST'])
     #     category = 2
     try:
+        cloths_type = ["Upper-body", "Lower-body", "Dress"]
         garm_img = request.files['garm_img']
         vton_img = request.files['vton_img']
+        cat = request.form['category']
+        print(f'category is {cat}')
+        category =cloths_type[int(cat)] # Default to Upper-body if not specified
         # Save the uploaded files
         garm_path = os.path.join(UPLOAD_FOLDER, 'garm_input.png')
         output_image.save(img_byte_arr, format='PNG')
         img_byte_arr = img_byte_arr.getvalue()
+        # Send the final "complete" event via SSE
+        sse.publish({"message": "Processing complete"}, type='complete')
         return Response(img_byte_arr, mimetype='image/png')
     except Exception as e:
+# nohup gunicorn -b 0.0.0.0:5003 sentiment_api:app &

celery_worker.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from celery import Celery
+from flask import current_app
+from app2 import app, sse  # Import your Flask app and SSE
+import uuid
+import os
+# import threading
+celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])
+celery.conf.update(app.config)
+# OUTPUT_FOLDER = 'path/to/output/folder'
+# image_results = {}
+# image_results_lock = threading.Lock()
+# def create_progress_callback(session_id):
+#     def progress_callback(step, total_steps):
+#         progress = int((step + 1) / total_steps * 100)
+#         print(f"Publishing progress {progress} for session {session_id}")
+#         sse.publish({"progress": progress}, type='progress', channel=session_id)
+#     return progress_callback
+@celery.task(bind=True)
+def process_image(self, session_id, garm_path, vton_path, category):
+    try:
+        print(f"Starting process_image task for session {session_id}")
+        progress_callback = create_progress_callback(session_id)
+        output_images = process_dc(garm_img=garm_path,
+                                   vton_img=vton_path,
+                                   category=category,
+                                   progress_callback=progress_callback)
+        if not output_images:
+            sse.publish({"error": "No output image generated"}, type='error', channel=session_id)
+            return None
+        output_image = output_images[0]
+        # Generate a UUID for the output image
+        image_uuid = str(uuid.uuid4())
+        # Create the output filename with the UUID
+        output_filename = f"{image_uuid}.png"
+        output_path = os.path.join(OUTPUT_FOLDER, output_filename)
+        # Save the output image
+        output_image.save(output_path, format='PNG')
+        # Add the UUID and path to the image_results map
+        with image_results_lock:
+            image_results[image_uuid] = output_path
+        sse.publish({"message": "Processing complete", "uuid": image_uuid}, type='complete', channel=session_id)
+        return image_uuid
+    except Exception as e:
+        sse.publish({"error": str(e)}, type='error', channel=session_id)
+        return print(f"panic in process_image: {str(e)}")

ootd/inference_ootd_dc.py CHANGED Viewed

@@ -99,6 +99,9 @@ class OOTDiffusionDC:
                 num_steps=20,
                 image_scale=1.0,
                 seed=-1,
     ):
         if seed == -1:
             random.seed(time.time())
@@ -128,6 +131,9 @@ class OOTDiffusionDC:
                         image_guidance_scale=image_scale,
                         num_images_per_prompt=num_samples,
                         generator=generator,
             ).images
         return images

                 num_steps=20,
                 image_scale=1.0,
                 seed=-1,
+                progress_callback=None,
+                progress_interval=5
     ):
         if seed == -1:
             random.seed(time.time())
                         image_guidance_scale=image_scale,
                         num_images_per_prompt=num_samples,
                         generator=generator,
+                        progress_callback=progress_callback,
+                        progress_interval=progress_interval,
             ).images
         return images

ootd/pipelines_ootd/pipeline_ootd.py CHANGED Viewed

@@ -167,6 +167,12 @@ class OotdPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMix
         return_dict: bool = True,
         callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
         callback_on_step_end_tensor_inputs: List[str] = ["latents"],
         **kwargs,
     ):
         r"""
@@ -362,83 +368,88 @@ class OotdPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMix
             return_dict=False,
         )
-        with self.progress_bar(total=num_inference_steps) as progress_bar:
-            for i, t in enumerate(timesteps):
-                latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
-                # concat latents, image_latents in the channel dimension
-                scaled_latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-                latent_vton_model_input = torch.cat([scaled_latent_model_input, vton_latents], dim=1)
-                # latent_vton_model_input = scaled_latent_model_input + vton_latents
-                spatial_attn_inputs = spatial_attn_outputs.copy()
-                # predict the noise residual
-                noise_pred = self.unet_vton(
-                    latent_vton_model_input,
-                    spatial_attn_inputs,
-                    t,
-                    encoder_hidden_states=prompt_embeds,
-                    return_dict=False,
-                )[0]
-                # Hack:
-                # For karras style schedulers the model does classifer free guidance using the
-                # predicted_original_sample instead of the noise_pred. So we need to compute the
-                # predicted_original_sample here if we are using a karras style scheduler.
-                if scheduler_is_in_sigma_space:
-                    step_index = (self.scheduler.timesteps == t).nonzero()[0].item()
-                    sigma = self.scheduler.sigmas[step_index]
-                    noise_pred = latent_model_input - sigma * noise_pred
-                # perform guidance
-                if self.do_classifier_free_guidance:
-                    noise_pred_text_image, noise_pred_text = noise_pred.chunk(2)
-                    noise_pred = (
-                        noise_pred_text
-                        + self.image_guidance_scale * (noise_pred_text_image - noise_pred_text)
-                    )
-                # Hack:
-                # For karras style schedulers the model does classifer free guidance using the
-                # predicted_original_sample instead of the noise_pred. But the scheduler.step function
-                # expects the noise_pred and computes the predicted_original_sample internally. So we
-                # need to overwrite the noise_pred here such that the value of the computed
-                # predicted_original_sample is correct.
-                if scheduler_is_in_sigma_space:
-                    noise_pred = (noise_pred - latents) / (-sigma)
-                # compute the previous noisy sample x_t -> x_t-1
-                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
-                init_latents_proper = image_ori_latents * self.vae.config.scaling_factor
-                # repainting
-                if i < len(timesteps) - 1:
-                    noise_timestep = timesteps[i + 1]
-                    init_latents_proper = self.scheduler.add_noise(
-                        init_latents_proper, noise, torch.tensor([noise_timestep])
-                    )
-                latents = (1 - mask_latents) * init_latents_proper + mask_latents * latents
-                if callback_on_step_end is not None:
-                    callback_kwargs = {}
-                    for k in callback_on_step_end_tensor_inputs:
-                        callback_kwargs[k] = locals()[k]
-                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
-                    latents = callback_outputs.pop("latents", latents)
-                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
-                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
-                    vton_latents = callback_outputs.pop("vton_latents", vton_latents)
-                # call the callback, if provided
-                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                    progress_bar.update()
-                    if callback is not None and i % callback_steps == 0:
-                        step_idx = i // getattr(self.scheduler, "order", 1)
-                        callback(step_idx, t, latents)
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]

         return_dict: bool = True,
         callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
         callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+        progress_callback: Optional[Callable[[int, int], None]] = None,
+        progress_interval=5,
         **kwargs,
     ):
         r"""
             return_dict=False,
         )
+        # with self.progress_bar(total=num_inference_steps) as progress_bar:
+        for i, t in enumerate(timesteps):
+            latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+            # concat latents, image_latents in the channel dimension
+            scaled_latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+            latent_vton_model_input = torch.cat([scaled_latent_model_input, vton_latents], dim=1)
+            # latent_vton_model_input = scaled_latent_model_input + vton_latents
+            spatial_attn_inputs = spatial_attn_outputs.copy()
+            # predict the noise residual
+            noise_pred = self.unet_vton(
+                latent_vton_model_input,
+                spatial_attn_inputs,
+                t,
+                encoder_hidden_states=prompt_embeds,
+                return_dict=False,
+            )[0]
+            # Hack:
+            # For karras style schedulers the model does classifer free guidance using the
+            # predicted_original_sample instead of the noise_pred. So we need to compute the
+            # predicted_original_sample here if we are using a karras style scheduler.
+            if scheduler_is_in_sigma_space:
+                step_index = (self.scheduler.timesteps == t).nonzero()[0].item()
+                sigma = self.scheduler.sigmas[step_index]
+                noise_pred = latent_model_input - sigma * noise_pred
+            # perform guidance
+            if self.do_classifier_free_guidance:
+                noise_pred_text_image, noise_pred_text = noise_pred.chunk(2)
+                noise_pred = (
+                    noise_pred_text
+                    + self.image_guidance_scale * (noise_pred_text_image - noise_pred_text)
+                )
+            # Hack:
+            # For karras style schedulers the model does classifer free guidance using the
+            # predicted_original_sample instead of the noise_pred. But the scheduler.step function
+            # expects the noise_pred and computes the predicted_original_sample internally. So we
+            # need to overwrite the noise_pred here such that the value of the computed
+            # predicted_original_sample is correct.
+            if scheduler_is_in_sigma_space:
+                noise_pred = (noise_pred - latents) / (-sigma)
+            # compute the previous noisy sample x_t -> x_t-1
+            latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+            init_latents_proper = image_ori_latents * self.vae.config.scaling_factor
+            # repainting
+            if i < len(timesteps) - 1:
+                noise_timestep = timesteps[i + 1]
+                init_latents_proper = self.scheduler.add_noise(
+                    init_latents_proper, noise, torch.tensor([noise_timestep])
+                )
+            latents = (1 - mask_latents) * init_latents_proper + mask_latents * latents
+            if callback_on_step_end is not None:
+                callback_kwargs = {}
+                for k in callback_on_step_end_tensor_inputs:
+                    callback_kwargs[k] = locals()[k]
+                callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+                latents = callback_outputs.pop("latents", latents)
+                prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+                negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+                vton_latents = callback_outputs.pop("vton_latents", vton_latents)
+            if progress_callback is not None and i % progress_interval == 0:
+                progress_callback(i, num_inference_steps)
+            # call the callback, if provided
+            if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                # progress_bar.update()
+                if callback is not None and i % callback_steps == 0:
+                    step_idx = i // getattr(self.scheduler, "order", 1)
+                    callback(step_idx, t, latents)
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]

preprocess/humanparsing/parsing_api.py CHANGED Viewed

@@ -16,6 +16,66 @@ from tqdm import tqdm
 from PIL import Image
 def get_palette(num_cls):
     """ Returns the color map for visualizing the segmentation mask.
     Args:
@@ -182,6 +242,16 @@ def onnx_inference(session, lip_session, input_dir):
     output_img.putpalette(palette)
     face_mask = torch.from_numpy((parsing_result == 11).astype(np.float32))
     return output_img, face_mask

 from PIL import Image
+def colorize_parsing(parsing_result):
+    label_map = {
+        0: "background", 1: "hat", 2: "hair", 3: "sunglasses", 4: "upper_clothes",
+        5: "skirt", 6: "pants", 7: "dress", 8: "belt", 9: "left_shoe",
+        10: "right_shoe", 11: "head", 12: "left_leg", 13: "right_leg",
+        14: "left_arm", 15: "right_arm", 16: "bag", 17: "scarf"
+    }
+    # Define colors for each part (RGB)
+    color_map = {
+        0: (0, 0, 0),       # Background
+        1: (128, 0, 0),     # Hat
+        2: (255, 0, 0),     # Hair
+        3: (0, 255, 0),     # Sunglasses
+        4: (0, 0, 255),     # Upper-clothes
+        5: (255, 255, 0),   # Skirt
+        6: (255, 0, 255),   # Pants
+        7: (0, 255, 255),   # Dress
+        8: (128, 128, 0),   # Belt
+        9: (0, 128, 128),   # Left-shoe
+        10: (128, 0, 128),  # Right-shoe
+        11: (128, 128, 128),# Head
+        12: (64, 0, 0),     # Left-leg
+        13: (192, 0, 0),    # Right-leg
+        14: (64, 128, 0),   # Left-arm
+        15: (192, 128, 0),  # Right-arm
+        16: (64, 0, 128),   # Bag
+        17: (192, 0, 128),  # Scarf
+    }
+    height, width = parsing_result.shape
+    colored_parsing = np.zeros((height, width, 3), dtype=np.uint8)
+    for label, color in color_map.items():
+        colored_parsing[parsing_result == label] = color
+    return colored_parsing
+def add_numbers_to_image(colored_parsing, parsing_result):
+    label_map = {
+        0: "background", 1: "hat", 2: "hair", 3: "sunglasses", 4: "upper_clothes",
+        5: "skirt", 6: "pants", 7: "dress", 8: "belt", 9: "left_shoe",
+        10: "right_shoe", 11: "head", 12: "left_leg", 13: "right_leg",
+        14: "left_arm", 15: "right_arm", 16: "bag", 17: "scarf"
+    }
+    height, width = parsing_result.shape
+    numbered_image = colored_parsing.copy()
+    for label in range(18):  # 0 to 17
+        mask = (parsing_result == label)
+        if np.any(mask):
+            y, x = np.where(mask)
+            center_y, center_x = int(np.mean(y)), int(np.mean(x))
+            cv2.putText(numbered_image, str(label), (center_x, center_y),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
+    return numbered_image
 def get_palette(num_cls):
     """ Returns the color map for visualizing the segmentation mask.
     Args:
     output_img.putpalette(palette)
     face_mask = torch.from_numpy((parsing_result == 11).astype(np.float32))
+    # Colorize the parsing result
+    colored_parsing = colorize_parsing(parsing_result)
+    # Add numbers to the colorized image
+    numbered_parsing = add_numbers_to_image(colored_parsing, parsing_result)
+    # Save the numbered parsing result
+    output_filename =  "colored_parsing.png"
+    cv2.imwrite(output_filename, cv2.cvtColor(numbered_parsing, cv2.COLOR_RGB2BGR))
     return output_img, face_mask

preprocess/openpose/run_openpose.py CHANGED Viewed

@@ -80,8 +80,8 @@ class OpenPose:
             # output_image = cv2.resize(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB), (768, 1024))
             # cv2.imwrite('/home/aigc/ProjectVTON/OpenPose/keypoints/out_pose.jpg', output_image)
-        # return keypoints
-        return keypoints, candidate, subset
 if __name__ == '__main__':

             # output_image = cv2.resize(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB), (768, 1024))
             # cv2.imwrite('/home/aigc/ProjectVTON/OpenPose/keypoints/out_pose.jpg', output_image)
+        return keypoints
+        # return keypoints, candidate, subset
 if __name__ == '__main__':

requirements.txt CHANGED Viewed

@@ -16,4 +16,6 @@ config==0.5.1
 einops==0.7.0
 onnxruntime==1.16.2
 basicsr
-onnxruntime-gpu==1.18.0

 einops==0.7.0
 onnxruntime==1.16.2
 basicsr
+onnxruntime-gpu==1.18.0
+Flask
+Flask-Cors

run/cloths_db.py CHANGED Viewed

@@ -62,8 +62,8 @@ cloths_map= {
 modeL_db= {
 '051962_0.jpg': "no-dress",
 '052472_0.jpg': "dress",
-'02783_00.jpg': 0,
-'09933_00.jpg': 0,
 '053228_0.jpg': "dress",
 '051482_0.jpg': "no-dress",
 'model_1.png': 0,

 modeL_db= {
 '051962_0.jpg': "no-dress",
 '052472_0.jpg': "dress",
+'02783_00.jpg': "dress",
+'09933_00.jpg': "dress",
 '053228_0.jpg': "dress",
 '051482_0.jpg': "no-dress",
 'model_1.png': 0,

run/gradio_ootd.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 from pathlib import Path
 import sys
 import torch
-from PIL import Image, ImageOps
 import numpy as np
 from utils_ootd import get_mask_location
 from cloths_db import cloths_map, modeL_db
@@ -20,11 +20,12 @@ from preprocess.openpose.annotator.openpose.util import draw_bodypose
 # torch.set_default_dtype(torch.float16)
-openpose_model_hd = OpenPose(0)
 parsing_model_hd = Parsing(0)
 ootd_model_hd = OOTDiffusionHD(0)
-openpose_model_dc = OpenPose(0)
 parsing_model_dc = Parsing(0)
 ootd_model_dc = OOTDiffusionDC(0)
@@ -41,11 +42,21 @@ garment_hd = os.path.join(example_path, 'garment/03244_00.jpg')
 model_dc = os.path.join(example_path, 'model/model_8.png')
 garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
-openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
 ootd_model_dc.pipe.to('cuda')
 ootd_model_dc.image_encoder.to('cuda')
 ootd_model_dc.text_encoder.to('cuda')
 def convert_to_image(image_array):
     if isinstance(image_array, np.ndarray):
         # Normalize the data to the range [0, 255]
@@ -69,14 +80,24 @@ def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
     category = 0 # 0:upperbody; 1:lowerbody; 2:dress
     with torch.no_grad():
-        openpose_model_hd.preprocessor.body_estimation.model.to('cuda')
-        ootd_model_hd.pipe.to('cuda')
-        ootd_model_hd.image_encoder.to('cuda')
-        ootd_model_hd.text_encoder.to('cuda')
         garm_img = Image.open(garm_img).resize((768, 1024))
         vton_img = Image.open(vton_img).resize((768, 1024))
-        keypoints = openpose_model_hd(vton_img.resize((384, 512)))
         model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))
         mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
@@ -94,123 +115,188 @@ def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
             image_ori=vton_img,
             num_samples=n_samples,
             num_steps=n_steps,
-            image_scale=image_scale,
-            seed=seed,
         )
     return images
-# @spaces.GPU
-def process_dc(vton_img, garm_img, category):
-    model_type = 'dc'
-    if category == 'Upper-body':
-        category = 0
-    elif category == 'Lower-body':
-        category = 1
-    else:
-        category =2
-    with torch.no_grad():
-        # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
-        # ootd_model_dc.pipe.to('cuda')
-        # ootd_model_dc.image_encoder.to('cuda')
-        # ootd_model_dc.text_encoder.to('cuda')
-        garm_img = Image.open(garm_img).resize((768, 1024))
-        vton_img = Image.open(vton_img).resize((768, 1024))
-        keypoints ,candidate , subset = openpose_model_dc(vton_img.resize((384, 512)))
-        # print(len(keypoints["pose_keypoints_2d"]))
-        # print(keypoints["pose_keypoints_2d"])
-        # person_image = np.asarray(vton_img)
-        # print(len(person_image))
-        # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
-        # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
-        # output_image = Image.fromarray(output)
-        # output_image.save('keypose.png')
-        left_point = keypoints["pose_keypoints_2d"][2]
-        right_point = keypoints["pose_keypoints_2d"][5]
-        neck_point = keypoints["pose_keypoints_2d"][1]
-        hip_point = keypoints["pose_keypoints_2d"][8]
-        print(f'left shoulder - {left_point}')
-        print(f'right shoulder - {right_point}')
-        # #find disctance using Euclidian distance
-        shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
-        height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
-        # # Assuming an average human height
-        average_height_cm = 172.72 *1.5
-        # Conversion factor from pixels to cm
-        conversion_factor = average_height_cm / height_pixels
-        # Convert shoulder width to real-world units
-        shoulder_width_cm = shoulder_width_pixels * conversion_factor
-        print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
-        print(f'Estimated height (in pixels): {height_pixels}')
-        print(f'Conversion factor (pixels to cm): {conversion_factor}')
-        print(f'Shoulder width (in cm): {shoulder_width_cm}')
-        print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
-        model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
-        model_parse_image = convert_to_image(model_parse)
-        face_mask_image = convert_to_image(face_mask)
-        # Save the images
-        model_parse_image.save('model_parse_image.png')
-        face_mask_image.save('face_mask_image.png')
-        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
-        # final_mask = convert_to_image(mask)
-        # final_mask.save("final_mask.png")
-        # final_mask_grat = convert_to_image(mask_gray)
-        # final_mask_grat.save("final_mask_grat.png")
-        mask = mask.resize((768, 1024), Image.NEAREST)
-        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
-        # Save the resized masks
-        mask.save("mask_resized.png")
-        mask_gray.save("mask_gray_resized.png")
-        masked_vton_img = Image.composite(mask_gray, vton_img, mask)
         masked_vton_img.save("masked_vton_img.png")
         images = ootd_model_dc(
             model_type=model_type,
             category=category_dict[category],
             image_garm=garm_img,
             image_vton=masked_vton_img,
-            mask=mask,
             image_ori=vton_img,
             num_samples=1,
             num_steps=10,
             image_scale=  2.0,
             seed=-1,
         )
     return images
 # is_upper = False
@@ -218,6 +304,8 @@ def process_dc(vton_img, garm_img, category):
 block = gr.Blocks().queue()
 with block:
     with gr.Row():
         gr.Markdown("# ")
@@ -258,6 +346,7 @@ with block:
                 os.path.join(example_path, 'model/051918_0.jpg'),
                 os.path.join(example_path, 'model/051962_0.jpg'),
                 os.path.join(example_path, 'model/049205_0.jpg'),
             ],
             )
@@ -273,7 +362,7 @@ with block:
                 os.path.join(example_path, 'model/052472_0.jpg'),
                 os.path.join(example_path, 'model/053514_0.jpg'),
                 os.path.join(example_path, 'model/053228_0.jpg'),
-                os.path.join(example_path, 'model/052964_0.jpg'),
                 os.path.join(example_path, 'model/053700_0.jpg'),
             ],
@@ -348,9 +437,17 @@ with block:
                     os.path.join(garment_path, '053319_1.jpg'),
                     os.path.join(garment_path, '052234_1.jpg'),
                 ])
-        with gr.Column():
-            result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
     with gr.Column():
         run_button_dc = gr.Button(value="Run")
         # n_samples_dc = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
         # n_steps_dc = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
@@ -359,10 +456,33 @@ with block:
         # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
     # ips_dc = [vton_img_dc, garm_img_dc, category_dc]
     ips_dc = [vton_img_dc, garm_img_dc ,category_dc]
-    run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
 block.launch(server_name="0.0.0.0", server_port=7860 )

 from pathlib import Path
 import sys
 import torch
+from PIL import Image, ImageOps , ImageDraw
 import numpy as np
 from utils_ootd import get_mask_location
 from cloths_db import cloths_map, modeL_db
 # torch.set_default_dtype(torch.float16)
+openpose_model = OpenPose(0)
 parsing_model_hd = Parsing(0)
 ootd_model_hd = OOTDiffusionHD(0)
 parsing_model_dc = Parsing(0)
 ootd_model_dc = OOTDiffusionDC(0)
 model_dc = os.path.join(example_path, 'model/model_8.png')
 garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
+openpose_model.preprocessor.body_estimation.model.to('cuda')
+#model dc
 ootd_model_dc.pipe.to('cuda')
 ootd_model_dc.image_encoder.to('cuda')
 ootd_model_dc.text_encoder.to('cuda')
+#model hd
+# ootd_model_hd.pipe.to('cuda')
+# ootd_model_hd.image_encoder.to('cuda')
+# ootd_model_hd.text_encoder.to('cuda')
 def convert_to_image(image_array):
     if isinstance(image_array, np.ndarray):
         # Normalize the data to the range [0, 255]
     category = 0 # 0:upperbody; 1:lowerbody; 2:dress
     with torch.no_grad():
+        # openpose_model_hd.preprocessor.body_estimation.model.to('cuda')
+        # ootd_model_hd.pipe.to('cuda')
+        # ootd_model_hd.image_encoder.to('cuda')
+        # ootd_model_hd.text_encoder.to('cuda')
+        # garm_img = Image.open(garm_img).resize((768, 1024))
+        # vton_img = Image.open(vton_img).resize((768, 1024))
+        # keypoints = openpose_model(vton_img.resize((384, 512)))
         garm_img = Image.open(garm_img).resize((768, 1024))
         vton_img = Image.open(vton_img).resize((768, 1024))
+        keypoints ,candidate , subset = openpose_model(vton_img.resize((384, 512)))
+        print(len(keypoints["pose_keypoints_2d"]))
+        print(keypoints["pose_keypoints_2d"])
         model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))
         mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
             image_ori=vton_img,
             num_samples=n_samples,
             num_steps=n_steps,
+            image_scale=2.0,
+            seed=42,
         )
     return images
+def create_bw_mask(size):
+    width, height = size
+    mask = Image.new('L', (width, height))
+    draw = ImageDraw.Draw(mask)
+    draw.rectangle([0, 0, width, height // 2], fill=255)  # top half white
+    draw.rectangle([0, height // 2, width, height], fill=0)  # bottom half black
+    return mask
+def create_mask(vton_img, garm_img, category):
+        model_type = 'dc'
+        if category == 'Upper-body':
+            category = 0
+        elif category == 'Lower-body':
+            category = 1
+        else:
+            category =2
+        with torch.no_grad():
+            # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
+            # ootd_model_dc.pipe.to('cuda')
+            # ootd_model_dc.image_encoder.to('cuda')
+            # ootd_model_dc.text_encoder.to('cuda')
+            garm_img = Image.open(garm_img).resize((768, 1024))
+            vton_img = Image.open(vton_img).resize((768, 1024))
+            keypoints  = openpose_model(vton_img.resize((384, 512)))
+            print(len(keypoints["pose_keypoints_2d"]))
+            print(keypoints["pose_keypoints_2d"])
+            # person_image = np.asarray(vton_img)
+            # print(len(person_image))
+            # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
+            # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
+            # output_image = Image.fromarray(output)
+            # output_image.save('keypose.png')
+            left_point = keypoints["pose_keypoints_2d"][2]
+            right_point = keypoints["pose_keypoints_2d"][5]
+            neck_point = keypoints["pose_keypoints_2d"][1]
+            hip_point = keypoints["pose_keypoints_2d"][8]
+            print(f'left shoulder - {left_point}')
+            print(f'right shoulder - {right_point}')
+            # #find disctance using Euclidian distance
+            shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
+            height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
+            # # Assuming an average human height
+            average_height_cm = 172.72 *1.5
+            # Conversion factor from pixels to cm
+            conversion_factor = average_height_cm / height_pixels
+            # Convert shoulder width to real-world units
+            shoulder_width_cm = shoulder_width_pixels * conversion_factor
+            print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
+            print(f'Estimated height (in pixels): {height_pixels}')
+            print(f'Conversion factor (pixels to cm): {conversion_factor}')
+            print(f'Shoulder width (in cm): {shoulder_width_cm}')
+            print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
+            model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
+            model_parse_image = convert_to_image(model_parse)
+            face_mask_image = convert_to_image(face_mask)
+            # Save the images
+            model_parse_image.save('model_parse_image.png')
+            face_mask_image.save('face_mask_image.png')
+            mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+            # up_mask, up_mask_gray = get_mask_location(model_type, category_dict_utils[0], model_parse, keypoints)
+            # lo_mask, lo_mask_gray = get_mask_location(model_type, category_dict_utils[1], model_parse, keypoints)
+            # mask = Image.composite(up_mask,lo_mask,up_mask)
+            # mask_gray = Image.composite(up_mask_gray, lo_mask_gray,up_mask)
+            mask = mask.resize((768, 1024), Image.NEAREST)
+            mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+            # if modeL_db[vton_img] == 0:
+            # Create a black-and-white mask
+            bw_mask = create_bw_mask((768, 1024))
+            #crete empty black image with mode L
+            temp_img = Image.new("L", (768, 1024), 0)
+            mask = Image.composite(mask, temp_img, bw_mask)
+            # print(mask)
+            # Save the resized masks
+            mask.save("mask_resized.png")
+            mask_gray.save("mask_gray_resized.png")
+            return [mask, mask_gray], mask, mask_gray
+# @spaces.GPU
+def process_dc(vton_img, garm_img, category, mask,mask_gray):
+    model_type = 'dc'
+    if category == 'Upper-body':
+        category = 0
+    elif category == 'Lower-body':
+        category = 1
+    else:
+        category =2
+    # Extract the composite images from the edit data
+    edited_mask = mask['composite']
+    edited_mask_gray = mask_gray['composite']
+    # print(edited_mask)
+    garm_img = Image.open(garm_img).resize((768, 1024))
+    vton_img = Image.open(vton_img).resize((768, 1024))
+    # print(f'vton_img is {vton_img}')
+    with torch.no_grad():
+         # Ensure both masks are in 'L' mode (grayscale)
+        if edited_mask.mode != 'L':
+            edited_mask = edited_mask.convert('L')
+        if edited_mask_gray.mode != 'L':
+            edited_mask_gray = edited_mask_gray.convert('L')
+        # Ensure all images and masks are the same size
+        edited_mask = edited_mask.resize((768, 1024), Image.NEAREST)
+        edited_mask_gray = edited_mask_gray.resize((768, 1024), Image.NEAREST)
+        print(f'mask: {edited_mask}')
+        print(f'vton_img: {vton_img}')
+        masked_vton_img = Image.composite(edited_mask_gray, vton_img, edited_mask)
         masked_vton_img.save("masked_vton_img.png")
+        print(f'category is {category}')
         images = ootd_model_dc(
             model_type=model_type,
             category=category_dict[category],
             image_garm=garm_img,
             image_vton=masked_vton_img,
+            mask=edited_mask,
             image_ori=vton_img,
             num_samples=1,
             num_steps=10,
             image_scale=  2.0,
             seed=-1,
         )
+    # return None
     return images
 # is_upper = False
 block = gr.Blocks().queue()
 with block:
+    mask_state = gr.State()
+    mask_gray_state = gr.State()
     with gr.Row():
         gr.Markdown("# ")
                 os.path.join(example_path, 'model/051918_0.jpg'),
                 os.path.join(example_path, 'model/051962_0.jpg'),
                 os.path.join(example_path, 'model/049205_0.jpg'),
+                os.path.join(example_path, 'model/05997_00.jpg'),
             ],
             )
                 os.path.join(example_path, 'model/052472_0.jpg'),
                 os.path.join(example_path, 'model/053514_0.jpg'),
                 os.path.join(example_path, 'model/053228_0.jpg'),
+                os.path.join(example_path, 'model/06802_00.jpg'),
                 os.path.join(example_path, 'model/053700_0.jpg'),
             ],
                     os.path.join(garment_path, '053319_1.jpg'),
                     os.path.join(garment_path, '052234_1.jpg'),
                 ])
     with gr.Column():
+        mask_gallery = gr.Gallery(label="Created Masks")
+        result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
+    with gr.Row():
+        # Add ImageEditor for mask editing
+        mask_editor = gr.ImageEditor(label="Edit Mask", type="pil")
+        # Add ImageEditor for mask_gray editing
+        mask_gray_editor = gr.ImageEditor(label="Edit Mask Gray", type="pil")
+    with gr.Column():
+        create_mask_button = gr.Button(value="Create Mask")
         run_button_dc = gr.Button(value="Run")
         # n_samples_dc = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
         # n_steps_dc = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
         # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
     # ips_dc = [vton_img_dc, garm_img_dc, category_dc]
     ips_dc = [vton_img_dc, garm_img_dc ,category_dc]
+    # create_mask_button.click(
+    #     fn=create_mask,
+    #     inputs=ips_dc,
+    #     outputs=[mask_gallery, mask_state, mask_gray_state]
+    # )
+    create_mask_button.click(
+        fn=create_mask,
+        inputs=ips_dc,
+        outputs=[mask_gallery, mask_editor, mask_gray_editor]
+    )
+    # run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
+    # run_button_dc.click(
+    # fn=process_dc,
+    # inputs=ips_dc + [mask_state, mask_gray_state],
+    # outputs=[result_gallery_dc])
+    run_button_dc.click(
+    fn=process_dc,
+    inputs=[vton_img_dc, garm_img_dc, category_dc, mask_editor, mask_gray_editor],
+    outputs=[result_gallery_dc]
+    )
 block.launch(server_name="0.0.0.0", server_port=7860 )

run/testing.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+from pathlib import Path
+import sys
+import torch
+from PIL import Image, ImageOps
+import numpy as np
+from utils_ootd import get_mask_location
+from cloths_db import cloths_map, modeL_db
+PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
+sys.path.insert(0, str(PROJECT_ROOT))
+from preprocess.openpose.run_openpose import OpenPose
+from preprocess.humanparsing.run_parsing import Parsing
+from ootd.inference_ootd_hd import OOTDiffusionHD
+from ootd.inference_ootd_dc import OOTDiffusionDC
+from preprocess.openpose.annotator.openpose.util import draw_bodypose
+openpose_model = OpenPose(0)
+parsing_model_dc = Parsing(0)
+ootd_model_dc = OOTDiffusionDC(0)
+category_dict = ['upperbody', 'lowerbody', 'dress']
+category_dict_utils = ['upper_body', 'lower_body', 'dresses']
+example_path = os.path.join(os.path.dirname(__file__), 'examples')
+garment_path = os.path.join(os.path.dirname(__file__), 'examples','garment')
+openpose_model.preprocessor.body_estimation.model.to('cuda')
+ootd_model_dc.pipe.to('cuda')
+ootd_model_dc.image_encoder.to('cuda')
+ootd_model_dc.text_encoder.to('cuda')
+def process_dc(vton_img, garm_img, category):
+    model_type = 'dc'
+    if category == 'Upper-body':
+        category = 0
+    elif category == 'Lower-body':
+        category = 1
+    else:
+        category =2
+    with torch.no_grad():
+        # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
+        # ootd_model_dc.pipe.to('cuda')
+        # ootd_model_dc.image_encoder.to('cuda')
+        # ootd_model_dc.text_encoder.to('cuda')
+        garm_img = Image.open(garm_img).resize((768, 1024))
+        vton_img = Image.open(vton_img).resize((768, 1024))
+        keypoints  = openpose_model(vton_img.resize((384, 512)))
+        print(len(keypoints["pose_keypoints_2d"]))
+        print(keypoints["pose_keypoints_2d"])
+        # person_image = np.asarray(vton_img)
+        # print(len(person_image))
+        # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
+        # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
+        # output_image = Image.fromarray(output)
+        # output_image.save('keypose.png')
+        left_point = keypoints["pose_keypoints_2d"][2]
+        right_point = keypoints["pose_keypoints_2d"][5]
+        neck_point = keypoints["pose_keypoints_2d"][1]
+        hip_point = keypoints["pose_keypoints_2d"][8]
+        print(f'left shoulder - {left_point}')
+        print(f'right shoulder - {right_point}')
+        # #find disctance using Euclidian distance
+        shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
+        height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
+        # # Assuming an average human height
+        average_height_cm = 172.72 *1.5
+        # Conversion factor from pixels to cm
+        conversion_factor = average_height_cm / height_pixels
+        # Convert shoulder width to real-world units
+        shoulder_width_cm = shoulder_width_pixels * conversion_factor
+        print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
+        print(f'Estimated height (in pixels): {height_pixels}')
+        print(f'Conversion factor (pixels to cm): {conversion_factor}')
+        print(f'Shoulder width (in cm): {shoulder_width_cm}')
+        print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
+        model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
+        # model_parse_image = convert_to_image(model_parse)
+        # face_mask_image = convert_to_image(face_mask)
+        # Save the images
+        # model_parse_image.save('model_parse_image.png')
+        # face_mask_image.save('face_mask_image.png')
+        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+        # up_mask, up_mask_gray = get_mask_location(model_type, category_dict_utils[0], model_parse, keypoints)
+        # lo_mask, lo_mask_gray = get_mask_location(model_type, category_dict_utils[1], model_parse, keypoints)
+        # mask = Image.composite(up_mask,lo_mask,up_mask)
+        # mask_gray = Image.composite(up_mask_gray, lo_mask_gray,up_mask)
+        mask = mask.resize((768, 1024), Image.NEAREST)
+        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+        # Save the resized masks
+        mask.save("mask_resized.png")
+        mask_gray.save("mask_gray_resized.png")
+        masked_vton_img = Image.composite(mask_gray, vton_img, mask)
+        masked_vton_img.save("masked_vton_img.png")
+        print(f'category is {category}')
+        # images = ootd_model_dc(
+        #     model_type=model_type,
+        #     category=category_dict[category],
+        #     image_garm=garm_img,
+        #     image_vton=masked_vton_img,
+        #     mask=mask,
+        #     image_ori=vton_img,
+        #     num_samples=1,
+        #     num_steps=10,
+        #     image_scale=  2.0,
+        #     seed=-1,
+        # )
+    # return None
+    return None
+if __name__ == '__main__':
+    model_dc = os.path.join(example_path, 'model/model_8.png')
+    garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
+    print(process_dc(model_dc,garment_dc,0))

run/utils_ootd.py CHANGED Viewed

@@ -57,6 +57,8 @@ def get_mask_location(model_type, category, model_parse: Image.Image, keypoint:
     im_parse = model_parse.resize((width, height), Image.NEAREST)
     parse_array = np.array(im_parse)
     if model_type == 'hd':
         arm_width = 60
     elif model_type == 'dc':
@@ -81,9 +83,12 @@ def get_mask_location(model_type, category, model_parse: Image.Image, keypoint:
     arms = arms_left + arms_right
     if category == 'dresses':
         parse_mask = (parse_array == 7).astype(np.float32) + \
                      (parse_array == 4).astype(np.float32) + \
-                     (parse_array == 5).astype(np.float32) + \
                      (parse_array == 6).astype(np.float32)
         parser_mask_changeable += np.logical_and(parse_array, np.logical_not(parser_mask_fixed))

     im_parse = model_parse.resize((width, height), Image.NEAREST)
     parse_array = np.array(im_parse)
+    # print(parse_array.shape)
     if model_type == 'hd':
         arm_width = 60
     elif model_type == 'dc':
     arms = arms_left + arms_right
     if category == 'dresses':
+        # parse_mask = (parse_array == 5).astype(np.float32) # + \
+        #              #(parse_array == 6).astype(np.float32)
         parse_mask = (parse_array == 7).astype(np.float32) + \
                      (parse_array == 4).astype(np.float32) + \
+                     (parse_array == 5).astype(np.float32)  + \
                      (parse_array == 6).astype(np.float32)
         parser_mask_changeable += np.logical_and(parse_array, np.logical_not(parser_mask_fixed))