Ubuntu commited on
Commit
6e6426e
β€’
1 Parent(s): 3bc69b8

fixed issuse with model with dress

Browse files
.gitignore CHANGED
@@ -19,3 +19,18 @@ run/examples/model/male/male_side.png
19
  run/examples/model/male/male_small_38.png
20
  run/examples/model/male/male_small.png
21
  run/examples/model/male/male_xl_45.png
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  run/examples/model/male/male_small_38.png
20
  run/examples/model/male/male_small.png
21
  run/examples/model/male/male_xl_45.png
22
+ __pycache__/app.cpython-310.pyc
23
+ __pycache__/app2.cpython-310.pyc
24
+ __pycache__/celery_worker.cpython-310.pyc
25
+ ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
26
+ ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
27
+ ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
28
+ run/examples/garment/male_tshirt1.png
29
+ colored_parsing.png
30
+ gram_img.png
31
+ vton_img.png
32
+ temp_images/garm_input.png
33
+ temp_images/vton_input.png
34
+
35
+ ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc
36
+ preprocess/humanparsing/datasets/__pycache__/simple_extractor_dataset.cpython-310.pyc
app.py CHANGED
@@ -3,6 +3,8 @@ from flask_cors import CORS
3
  import logging
4
  import gc
5
  import os
 
 
6
  from io import BytesIO
7
  from pathlib import Path
8
  import sys
@@ -19,6 +21,7 @@ from ootd.inference_ootd_dc import OOTDiffusionDC
19
  PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
20
  sys.path.insert(0, str(PROJECT_ROOT))
21
 
 
22
 
23
 
24
  #run python garbage collector and nvidia cuda clear memory
@@ -29,6 +32,8 @@ torch.cuda.empty_cache()
29
  # Setup Flask server
30
  app = Flask(__name__)
31
  CORS(app, origins="*") # Enable CORS for the entire app
 
 
32
 
33
 
34
  logger = logging.getLogger()
@@ -58,15 +63,32 @@ if not os.path.exists(UPLOAD_FOLDER):
58
  os.makedirs(UPLOAD_FOLDER)
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def process_dc(vton_img, garm_img, category):
62
  model_type = 'dc'
63
 
64
- # if category == 'Upper-body':
65
- # category = 0
66
- # elif category == 'Lower-body':
67
- # category = 1
68
- # else:
69
- # category = 2
70
 
71
  with torch.no_grad():
72
  # openpose_model.preprocessor.body_estimation.model.to('cuda')
@@ -74,8 +96,8 @@ def process_dc(vton_img, garm_img, category):
74
  # ootd_model_dc.image_encoder.to('cuda')
75
  # ootd_model_dc.text_encoder.to('cuda')
76
 
77
- garm_img = Image.open(garm_img).convert('RGB').resize((768, 1024))
78
- vton_img = Image.open(vton_img).convert('RGB').resize((768, 1024))
79
  keypoints = openpose_model(vton_img.resize((384, 512)))
80
 
81
  print(len(keypoints["pose_keypoints_2d"]))
@@ -134,6 +156,19 @@ def process_dc(vton_img, garm_img, category):
134
 
135
  print(f'category is {category}')
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  images = ootd_model_dc(
138
  model_type=model_type,
139
  category=category_dict[category],
@@ -141,12 +176,15 @@ def process_dc(vton_img, garm_img, category):
141
  image_vton=masked_vton_img,
142
  mask=mask,
143
  image_ori=vton_img,
144
- num_samples=1,
145
  num_steps=10,
146
- image_scale= 1.0,
147
- seed=-1,
 
 
148
  )
149
 
 
150
  return images
151
 
152
 
@@ -161,7 +199,9 @@ def root():
161
  response_data = {"message": "Internal server Error"}
162
  return jsonify(response_data), 500
163
 
164
-
 
 
165
 
166
  #write Flask api name "generate" with POST method that will input 2 images and return 1 image
167
  @app.route('/generate', methods=['POST'])
@@ -193,9 +233,14 @@ def generate():
193
  # category = 2
194
 
195
  try:
 
196
  garm_img = request.files['garm_img']
197
  vton_img = request.files['vton_img']
198
- category = 0 # Default to Upper-body if not specified
 
 
 
 
199
 
200
  # Save the uploaded files
201
  garm_path = os.path.join(UPLOAD_FOLDER, 'garm_input.png')
@@ -222,6 +267,9 @@ def generate():
222
  output_image.save(img_byte_arr, format='PNG')
223
  img_byte_arr = img_byte_arr.getvalue()
224
 
 
 
 
225
  return Response(img_byte_arr, mimetype='image/png')
226
 
227
  except Exception as e:
@@ -239,4 +287,4 @@ if __name__ == '__main__':
239
 
240
 
241
 
242
- # nohup gunicorn -b 0.0.0.0:5003 sentiment_api:app &
 
3
  import logging
4
  import gc
5
  import os
6
+ from threading import Thread
7
+ from flask_sse import sse
8
  from io import BytesIO
9
  from pathlib import Path
10
  import sys
 
21
  PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
22
  sys.path.insert(0, str(PROJECT_ROOT))
23
 
24
+ from queue import Queue
25
 
26
 
27
  #run python garbage collector and nvidia cuda clear memory
 
32
  # Setup Flask server
33
  app = Flask(__name__)
34
  CORS(app, origins="*") # Enable CORS for the entire app
35
+ app.config["REDIS_URL"] = "redis://localhost:6379"
36
+ app.register_blueprint(sse, url_prefix='/stream')
37
 
38
 
39
  logger = logging.getLogger()
 
63
  os.makedirs(UPLOAD_FOLDER)
64
 
65
 
66
+ # progress_queue = Queue()
67
+
68
+ # def progress_callback(step, total_steps):
69
+ # if total_steps is not None and total_steps > 0:
70
+ # progress = int((step + 1) / total_steps * 100)
71
+ # progress_queue.put(progress)
72
+ # else:
73
+ # progress_queue.put(step + 1)
74
+
75
+ def progress_callback(step, total_steps):
76
+ if total_steps is not None and total_steps > 0:
77
+ progress = int((step + 1) / total_steps * 100)
78
+ sse.publish({"progress": progress}, type='progress')
79
+ else:
80
+ sse.publish({"step": step + 1}, type='progress')
81
+
82
+
83
  def process_dc(vton_img, garm_img, category):
84
  model_type = 'dc'
85
 
86
+ if category == 'Upper-body':
87
+ category = 0
88
+ elif category == 'Lower-body':
89
+ category = 1
90
+ else:
91
+ category = 2
92
 
93
  with torch.no_grad():
94
  # openpose_model.preprocessor.body_estimation.model.to('cuda')
 
96
  # ootd_model_dc.image_encoder.to('cuda')
97
  # ootd_model_dc.text_encoder.to('cuda')
98
 
99
+ garm_img = Image.open(garm_img).resize((768, 1024))
100
+ vton_img = Image.open(vton_img).resize((768, 1024))
101
  keypoints = openpose_model(vton_img.resize((384, 512)))
102
 
103
  print(len(keypoints["pose_keypoints_2d"]))
 
156
 
157
  print(f'category is {category}')
158
 
159
+ # images = ootd_model_dc(
160
+ # model_type=model_type,
161
+ # category=category_dict[category],
162
+ # image_garm=garm_img,
163
+ # image_vton=masked_vton_img,
164
+ # mask=mask,
165
+ # image_ori=vton_img,
166
+ # num_samples=3,
167
+ # num_steps=20,
168
+ # image_scale= 2.0,
169
+ # seed=-1,
170
+ # )
171
+
172
  images = ootd_model_dc(
173
  model_type=model_type,
174
  category=category_dict[category],
 
176
  image_vton=masked_vton_img,
177
  mask=mask,
178
  image_ori=vton_img,
179
+ num_samples=2,
180
  num_steps=10,
181
+ image_scale=2.0,
182
+ seed=42,
183
+ progress_callback=progress_callback,
184
+ progress_interval=1, # Update progress every step
185
  )
186
 
187
+
188
  return images
189
 
190
 
 
199
  response_data = {"message": "Internal server Error"}
200
  return jsonify(response_data), 500
201
 
202
+ @app.route('/stream')
203
+ def stream():
204
+ return Response(sse.stream(), content_type='text/event-stream')
205
 
206
  #write Flask api name "generate" with POST method that will input 2 images and return 1 image
207
  @app.route('/generate', methods=['POST'])
 
233
  # category = 2
234
 
235
  try:
236
+ cloths_type = ["Upper-body", "Lower-body", "Dress"]
237
  garm_img = request.files['garm_img']
238
  vton_img = request.files['vton_img']
239
+ cat = request.form['category']
240
+
241
+ print(f'category is {cat}')
242
+
243
+ category =cloths_type[int(cat)] # Default to Upper-body if not specified
244
 
245
  # Save the uploaded files
246
  garm_path = os.path.join(UPLOAD_FOLDER, 'garm_input.png')
 
267
  output_image.save(img_byte_arr, format='PNG')
268
  img_byte_arr = img_byte_arr.getvalue()
269
 
270
+ # Send the final "complete" event via SSE
271
+ sse.publish({"message": "Processing complete"}, type='complete')
272
+
273
  return Response(img_byte_arr, mimetype='image/png')
274
 
275
  except Exception as e:
 
287
 
288
 
289
 
290
+ # nohup gunicorn -b 0.0.0.0:5003 sentiment_api:app &
celery_worker.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from celery import Celery
2
+ from flask import current_app
3
+ from app2 import app, sse # Import your Flask app and SSE
4
+ import uuid
5
+ import os
6
+
7
+ # import threading
8
+ celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])
9
+ celery.conf.update(app.config)
10
+
11
+
12
+
13
+ # OUTPUT_FOLDER = 'path/to/output/folder'
14
+ # image_results = {}
15
+ # image_results_lock = threading.Lock()
16
+
17
+
18
+ # def create_progress_callback(session_id):
19
+ # def progress_callback(step, total_steps):
20
+ # progress = int((step + 1) / total_steps * 100)
21
+ # print(f"Publishing progress {progress} for session {session_id}")
22
+ # sse.publish({"progress": progress}, type='progress', channel=session_id)
23
+ # return progress_callback
24
+
25
+ @celery.task(bind=True)
26
+ def process_image(self, session_id, garm_path, vton_path, category):
27
+ try:
28
+ print(f"Starting process_image task for session {session_id}")
29
+
30
+ progress_callback = create_progress_callback(session_id)
31
+
32
+ output_images = process_dc(garm_img=garm_path,
33
+ vton_img=vton_path,
34
+ category=category,
35
+ progress_callback=progress_callback)
36
+
37
+ if not output_images:
38
+ sse.publish({"error": "No output image generated"}, type='error', channel=session_id)
39
+ return None
40
+
41
+ output_image = output_images[0]
42
+
43
+ # Generate a UUID for the output image
44
+ image_uuid = str(uuid.uuid4())
45
+
46
+ # Create the output filename with the UUID
47
+ output_filename = f"{image_uuid}.png"
48
+ output_path = os.path.join(OUTPUT_FOLDER, output_filename)
49
+
50
+ # Save the output image
51
+ output_image.save(output_path, format='PNG')
52
+
53
+ # Add the UUID and path to the image_results map
54
+ with image_results_lock:
55
+ image_results[image_uuid] = output_path
56
+
57
+ sse.publish({"message": "Processing complete", "uuid": image_uuid}, type='complete', channel=session_id)
58
+
59
+ return image_uuid
60
+
61
+ except Exception as e:
62
+ sse.publish({"error": str(e)}, type='error', channel=session_id)
63
+ return print(f"panic in process_image: {str(e)}")
ootd/inference_ootd_dc.py CHANGED
@@ -99,6 +99,9 @@ class OOTDiffusionDC:
99
  num_steps=20,
100
  image_scale=1.0,
101
  seed=-1,
 
 
 
102
  ):
103
  if seed == -1:
104
  random.seed(time.time())
@@ -128,6 +131,9 @@ class OOTDiffusionDC:
128
  image_guidance_scale=image_scale,
129
  num_images_per_prompt=num_samples,
130
  generator=generator,
 
 
 
131
  ).images
132
 
133
  return images
 
99
  num_steps=20,
100
  image_scale=1.0,
101
  seed=-1,
102
+
103
+ progress_callback=None,
104
+ progress_interval=5
105
  ):
106
  if seed == -1:
107
  random.seed(time.time())
 
131
  image_guidance_scale=image_scale,
132
  num_images_per_prompt=num_samples,
133
  generator=generator,
134
+
135
+ progress_callback=progress_callback,
136
+ progress_interval=progress_interval,
137
  ).images
138
 
139
  return images
ootd/pipelines_ootd/pipeline_ootd.py CHANGED
@@ -167,6 +167,12 @@ class OotdPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMix
167
  return_dict: bool = True,
168
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
169
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
 
 
 
 
 
 
170
  **kwargs,
171
  ):
172
  r"""
@@ -362,83 +368,88 @@ class OotdPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMix
362
  return_dict=False,
363
  )
364
 
365
- with self.progress_bar(total=num_inference_steps) as progress_bar:
366
- for i, t in enumerate(timesteps):
367
- latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
368
-
369
- # concat latents, image_latents in the channel dimension
370
- scaled_latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
371
- latent_vton_model_input = torch.cat([scaled_latent_model_input, vton_latents], dim=1)
372
- # latent_vton_model_input = scaled_latent_model_input + vton_latents
373
-
374
- spatial_attn_inputs = spatial_attn_outputs.copy()
375
-
376
- # predict the noise residual
377
- noise_pred = self.unet_vton(
378
- latent_vton_model_input,
379
- spatial_attn_inputs,
380
- t,
381
- encoder_hidden_states=prompt_embeds,
382
- return_dict=False,
383
- )[0]
384
-
385
- # Hack:
386
- # For karras style schedulers the model does classifer free guidance using the
387
- # predicted_original_sample instead of the noise_pred. So we need to compute the
388
- # predicted_original_sample here if we are using a karras style scheduler.
389
- if scheduler_is_in_sigma_space:
390
- step_index = (self.scheduler.timesteps == t).nonzero()[0].item()
391
- sigma = self.scheduler.sigmas[step_index]
392
- noise_pred = latent_model_input - sigma * noise_pred
393
-
394
- # perform guidance
395
- if self.do_classifier_free_guidance:
396
- noise_pred_text_image, noise_pred_text = noise_pred.chunk(2)
397
- noise_pred = (
398
- noise_pred_text
399
- + self.image_guidance_scale * (noise_pred_text_image - noise_pred_text)
400
- )
401
-
402
- # Hack:
403
- # For karras style schedulers the model does classifer free guidance using the
404
- # predicted_original_sample instead of the noise_pred. But the scheduler.step function
405
- # expects the noise_pred and computes the predicted_original_sample internally. So we
406
- # need to overwrite the noise_pred here such that the value of the computed
407
- # predicted_original_sample is correct.
408
- if scheduler_is_in_sigma_space:
409
- noise_pred = (noise_pred - latents) / (-sigma)
410
-
411
- # compute the previous noisy sample x_t -> x_t-1
412
- latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
413
-
414
- init_latents_proper = image_ori_latents * self.vae.config.scaling_factor
415
-
416
- # repainting
417
- if i < len(timesteps) - 1:
418
- noise_timestep = timesteps[i + 1]
419
- init_latents_proper = self.scheduler.add_noise(
420
- init_latents_proper, noise, torch.tensor([noise_timestep])
421
- )
422
-
423
- latents = (1 - mask_latents) * init_latents_proper + mask_latents * latents
424
-
425
- if callback_on_step_end is not None:
426
- callback_kwargs = {}
427
- for k in callback_on_step_end_tensor_inputs:
428
- callback_kwargs[k] = locals()[k]
429
- callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
430
-
431
- latents = callback_outputs.pop("latents", latents)
432
- prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
433
- negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
434
- vton_latents = callback_outputs.pop("vton_latents", vton_latents)
435
-
436
- # call the callback, if provided
437
- if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
438
- progress_bar.update()
439
- if callback is not None and i % callback_steps == 0:
440
- step_idx = i // getattr(self.scheduler, "order", 1)
441
- callback(step_idx, t, latents)
 
 
 
 
 
442
 
443
  if not output_type == "latent":
444
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
 
167
  return_dict: bool = True,
168
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
169
  callback_on_step_end_tensor_inputs: List[str] = ["latents"],
170
+
171
+ progress_callback: Optional[Callable[[int, int], None]] = None,
172
+ progress_interval=5,
173
+
174
+
175
+
176
  **kwargs,
177
  ):
178
  r"""
 
368
  return_dict=False,
369
  )
370
 
371
+ # with self.progress_bar(total=num_inference_steps) as progress_bar:
372
+ for i, t in enumerate(timesteps):
373
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
374
+
375
+ # concat latents, image_latents in the channel dimension
376
+ scaled_latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
377
+ latent_vton_model_input = torch.cat([scaled_latent_model_input, vton_latents], dim=1)
378
+ # latent_vton_model_input = scaled_latent_model_input + vton_latents
379
+
380
+ spatial_attn_inputs = spatial_attn_outputs.copy()
381
+
382
+ # predict the noise residual
383
+ noise_pred = self.unet_vton(
384
+ latent_vton_model_input,
385
+ spatial_attn_inputs,
386
+ t,
387
+ encoder_hidden_states=prompt_embeds,
388
+ return_dict=False,
389
+ )[0]
390
+
391
+ # Hack:
392
+ # For karras style schedulers the model does classifer free guidance using the
393
+ # predicted_original_sample instead of the noise_pred. So we need to compute the
394
+ # predicted_original_sample here if we are using a karras style scheduler.
395
+ if scheduler_is_in_sigma_space:
396
+ step_index = (self.scheduler.timesteps == t).nonzero()[0].item()
397
+ sigma = self.scheduler.sigmas[step_index]
398
+ noise_pred = latent_model_input - sigma * noise_pred
399
+
400
+ # perform guidance
401
+ if self.do_classifier_free_guidance:
402
+ noise_pred_text_image, noise_pred_text = noise_pred.chunk(2)
403
+ noise_pred = (
404
+ noise_pred_text
405
+ + self.image_guidance_scale * (noise_pred_text_image - noise_pred_text)
406
+ )
407
+
408
+ # Hack:
409
+ # For karras style schedulers the model does classifer free guidance using the
410
+ # predicted_original_sample instead of the noise_pred. But the scheduler.step function
411
+ # expects the noise_pred and computes the predicted_original_sample internally. So we
412
+ # need to overwrite the noise_pred here such that the value of the computed
413
+ # predicted_original_sample is correct.
414
+ if scheduler_is_in_sigma_space:
415
+ noise_pred = (noise_pred - latents) / (-sigma)
416
+
417
+ # compute the previous noisy sample x_t -> x_t-1
418
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
419
+
420
+ init_latents_proper = image_ori_latents * self.vae.config.scaling_factor
421
+
422
+ # repainting
423
+ if i < len(timesteps) - 1:
424
+ noise_timestep = timesteps[i + 1]
425
+ init_latents_proper = self.scheduler.add_noise(
426
+ init_latents_proper, noise, torch.tensor([noise_timestep])
427
+ )
428
+
429
+ latents = (1 - mask_latents) * init_latents_proper + mask_latents * latents
430
+
431
+ if callback_on_step_end is not None:
432
+ callback_kwargs = {}
433
+ for k in callback_on_step_end_tensor_inputs:
434
+ callback_kwargs[k] = locals()[k]
435
+ callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
436
+
437
+ latents = callback_outputs.pop("latents", latents)
438
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
439
+ negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
440
+ vton_latents = callback_outputs.pop("vton_latents", vton_latents)
441
+
442
+
443
+ if progress_callback is not None and i % progress_interval == 0:
444
+ progress_callback(i, num_inference_steps)
445
+
446
+
447
+ # call the callback, if provided
448
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
449
+ # progress_bar.update()
450
+ if callback is not None and i % callback_steps == 0:
451
+ step_idx = i // getattr(self.scheduler, "order", 1)
452
+ callback(step_idx, t, latents)
453
 
454
  if not output_type == "latent":
455
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
preprocess/humanparsing/parsing_api.py CHANGED
@@ -16,6 +16,66 @@ from tqdm import tqdm
16
  from PIL import Image
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def get_palette(num_cls):
20
  """ Returns the color map for visualizing the segmentation mask.
21
  Args:
@@ -182,6 +242,16 @@ def onnx_inference(session, lip_session, input_dir):
182
  output_img.putpalette(palette)
183
  face_mask = torch.from_numpy((parsing_result == 11).astype(np.float32))
184
 
 
 
 
 
 
 
 
 
 
 
185
  return output_img, face_mask
186
 
187
 
 
16
  from PIL import Image
17
 
18
 
19
+ def colorize_parsing(parsing_result):
20
+ label_map = {
21
+ 0: "background", 1: "hat", 2: "hair", 3: "sunglasses", 4: "upper_clothes",
22
+ 5: "skirt", 6: "pants", 7: "dress", 8: "belt", 9: "left_shoe",
23
+ 10: "right_shoe", 11: "head", 12: "left_leg", 13: "right_leg",
24
+ 14: "left_arm", 15: "right_arm", 16: "bag", 17: "scarf"
25
+ }
26
+
27
+ # Define colors for each part (RGB)
28
+ color_map = {
29
+ 0: (0, 0, 0), # Background
30
+ 1: (128, 0, 0), # Hat
31
+ 2: (255, 0, 0), # Hair
32
+ 3: (0, 255, 0), # Sunglasses
33
+ 4: (0, 0, 255), # Upper-clothes
34
+ 5: (255, 255, 0), # Skirt
35
+ 6: (255, 0, 255), # Pants
36
+ 7: (0, 255, 255), # Dress
37
+ 8: (128, 128, 0), # Belt
38
+ 9: (0, 128, 128), # Left-shoe
39
+ 10: (128, 0, 128), # Right-shoe
40
+ 11: (128, 128, 128),# Head
41
+ 12: (64, 0, 0), # Left-leg
42
+ 13: (192, 0, 0), # Right-leg
43
+ 14: (64, 128, 0), # Left-arm
44
+ 15: (192, 128, 0), # Right-arm
45
+ 16: (64, 0, 128), # Bag
46
+ 17: (192, 0, 128), # Scarf
47
+ }
48
+
49
+ height, width = parsing_result.shape
50
+ colored_parsing = np.zeros((height, width, 3), dtype=np.uint8)
51
+
52
+ for label, color in color_map.items():
53
+ colored_parsing[parsing_result == label] = color
54
+
55
+ return colored_parsing
56
+
57
+ def add_numbers_to_image(colored_parsing, parsing_result):
58
+ label_map = {
59
+ 0: "background", 1: "hat", 2: "hair", 3: "sunglasses", 4: "upper_clothes",
60
+ 5: "skirt", 6: "pants", 7: "dress", 8: "belt", 9: "left_shoe",
61
+ 10: "right_shoe", 11: "head", 12: "left_leg", 13: "right_leg",
62
+ 14: "left_arm", 15: "right_arm", 16: "bag", 17: "scarf"
63
+ }
64
+
65
+ height, width = parsing_result.shape
66
+ numbered_image = colored_parsing.copy()
67
+
68
+ for label in range(18): # 0 to 17
69
+ mask = (parsing_result == label)
70
+ if np.any(mask):
71
+ y, x = np.where(mask)
72
+ center_y, center_x = int(np.mean(y)), int(np.mean(x))
73
+
74
+ cv2.putText(numbered_image, str(label), (center_x, center_y),
75
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
76
+
77
+ return numbered_image
78
+
79
  def get_palette(num_cls):
80
  """ Returns the color map for visualizing the segmentation mask.
81
  Args:
 
242
  output_img.putpalette(palette)
243
  face_mask = torch.from_numpy((parsing_result == 11).astype(np.float32))
244
 
245
+ # Colorize the parsing result
246
+ colored_parsing = colorize_parsing(parsing_result)
247
+
248
+ # Add numbers to the colorized image
249
+ numbered_parsing = add_numbers_to_image(colored_parsing, parsing_result)
250
+
251
+ # Save the numbered parsing result
252
+ output_filename = "colored_parsing.png"
253
+ cv2.imwrite(output_filename, cv2.cvtColor(numbered_parsing, cv2.COLOR_RGB2BGR))
254
+
255
  return output_img, face_mask
256
 
257
 
preprocess/openpose/run_openpose.py CHANGED
@@ -80,8 +80,8 @@ class OpenPose:
80
  # output_image = cv2.resize(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB), (768, 1024))
81
  # cv2.imwrite('/home/aigc/ProjectVTON/OpenPose/keypoints/out_pose.jpg', output_image)
82
 
83
- # return keypoints
84
- return keypoints, candidate, subset
85
 
86
 
87
  if __name__ == '__main__':
 
80
  # output_image = cv2.resize(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB), (768, 1024))
81
  # cv2.imwrite('/home/aigc/ProjectVTON/OpenPose/keypoints/out_pose.jpg', output_image)
82
 
83
+ return keypoints
84
+ # return keypoints, candidate, subset
85
 
86
 
87
  if __name__ == '__main__':
requirements.txt CHANGED
@@ -16,4 +16,6 @@ config==0.5.1
16
  einops==0.7.0
17
  onnxruntime==1.16.2
18
  basicsr
19
- onnxruntime-gpu==1.18.0
 
 
 
16
  einops==0.7.0
17
  onnxruntime==1.16.2
18
  basicsr
19
+ onnxruntime-gpu==1.18.0
20
+ Flask
21
+ Flask-Cors
run/cloths_db.py CHANGED
@@ -62,8 +62,8 @@ cloths_map= {
62
  modeL_db= {
63
  '051962_0.jpg': "no-dress",
64
  '052472_0.jpg': "dress",
65
- '02783_00.jpg': 0,
66
- '09933_00.jpg': 0,
67
  '053228_0.jpg': "dress",
68
  '051482_0.jpg': "no-dress",
69
  'model_1.png': 0,
 
62
  modeL_db= {
63
  '051962_0.jpg': "no-dress",
64
  '052472_0.jpg': "dress",
65
+ '02783_00.jpg': "dress",
66
+ '09933_00.jpg': "dress",
67
  '053228_0.jpg': "dress",
68
  '051482_0.jpg': "no-dress",
69
  'model_1.png': 0,
run/gradio_ootd.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from pathlib import Path
4
  import sys
5
  import torch
6
- from PIL import Image, ImageOps
7
  import numpy as np
8
  from utils_ootd import get_mask_location
9
  from cloths_db import cloths_map, modeL_db
@@ -20,11 +20,12 @@ from preprocess.openpose.annotator.openpose.util import draw_bodypose
20
  # torch.set_default_dtype(torch.float16)
21
 
22
 
23
- openpose_model_hd = OpenPose(0)
 
24
  parsing_model_hd = Parsing(0)
25
  ootd_model_hd = OOTDiffusionHD(0)
26
 
27
- openpose_model_dc = OpenPose(0)
28
  parsing_model_dc = Parsing(0)
29
  ootd_model_dc = OOTDiffusionDC(0)
30
 
@@ -41,11 +42,21 @@ garment_hd = os.path.join(example_path, 'garment/03244_00.jpg')
41
  model_dc = os.path.join(example_path, 'model/model_8.png')
42
  garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
43
 
44
- openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
 
 
45
  ootd_model_dc.pipe.to('cuda')
46
  ootd_model_dc.image_encoder.to('cuda')
47
  ootd_model_dc.text_encoder.to('cuda')
48
 
 
 
 
 
 
 
 
 
49
  def convert_to_image(image_array):
50
  if isinstance(image_array, np.ndarray):
51
  # Normalize the data to the range [0, 255]
@@ -69,14 +80,24 @@ def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
69
  category = 0 # 0:upperbody; 1:lowerbody; 2:dress
70
 
71
  with torch.no_grad():
72
- openpose_model_hd.preprocessor.body_estimation.model.to('cuda')
73
- ootd_model_hd.pipe.to('cuda')
74
- ootd_model_hd.image_encoder.to('cuda')
75
- ootd_model_hd.text_encoder.to('cuda')
76
 
 
 
 
 
77
  garm_img = Image.open(garm_img).resize((768, 1024))
78
  vton_img = Image.open(vton_img).resize((768, 1024))
79
- keypoints = openpose_model_hd(vton_img.resize((384, 512)))
 
 
 
 
 
 
80
  model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))
81
 
82
  mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
@@ -94,123 +115,188 @@ def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
94
  image_ori=vton_img,
95
  num_samples=n_samples,
96
  num_steps=n_steps,
97
- image_scale=image_scale,
98
- seed=seed,
99
  )
100
 
101
  return images
102
 
 
 
 
 
 
 
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # @spaces.GPU
106
- def process_dc(vton_img, garm_img, category):
107
- model_type = 'dc'
108
- if category == 'Upper-body':
109
- category = 0
110
- elif category == 'Lower-body':
111
- category = 1
112
- else:
113
- category =2
114
 
115
- with torch.no_grad():
116
- # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
117
- # ootd_model_dc.pipe.to('cuda')
118
- # ootd_model_dc.image_encoder.to('cuda')
119
- # ootd_model_dc.text_encoder.to('cuda')
120
-
121
- garm_img = Image.open(garm_img).resize((768, 1024))
122
- vton_img = Image.open(vton_img).resize((768, 1024))
123
- keypoints ,candidate , subset = openpose_model_dc(vton_img.resize((384, 512)))
124
 
125
- # print(len(keypoints["pose_keypoints_2d"]))
126
- # print(keypoints["pose_keypoints_2d"])
127
 
128
- # person_image = np.asarray(vton_img)
 
129
 
 
130
 
131
- # print(len(person_image))
132
-
 
133
 
134
- # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
135
 
136
- # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
137
- # output_image = Image.fromarray(output)
138
- # output_image.save('keypose.png')
139
 
 
 
140
 
 
 
141
 
142
- left_point = keypoints["pose_keypoints_2d"][2]
143
- right_point = keypoints["pose_keypoints_2d"][5]
144
 
145
- neck_point = keypoints["pose_keypoints_2d"][1]
146
- hip_point = keypoints["pose_keypoints_2d"][8]
147
 
 
 
 
 
 
148
 
 
149
 
150
- print(f'left shoulder - {left_point}')
151
- print(f'right shoulder - {right_point}')
152
-
153
- # #find disctance using Euclidian distance
154
- shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
155
 
156
- height_pixels = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
 
157
 
 
 
158
 
159
- # # Assuming an average human height
160
- average_height_cm = 172.72 *1.5
161
 
162
- # Conversion factor from pixels to cm
163
- conversion_factor = average_height_cm / height_pixels
 
 
 
164
 
165
- # Convert shoulder width to real-world units
166
- shoulder_width_cm = shoulder_width_pixels * conversion_factor
167
 
168
- print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
169
- print(f'Estimated height (in pixels): {height_pixels}')
170
- print(f'Conversion factor (pixels to cm): {conversion_factor}')
171
- print(f'Shoulder width (in cm): {shoulder_width_cm}')
172
- print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
173
 
174
- model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
 
 
175
 
176
- model_parse_image = convert_to_image(model_parse)
177
- face_mask_image = convert_to_image(face_mask)
178
 
179
- # Save the images
180
- model_parse_image.save('model_parse_image.png')
181
- face_mask_image.save('face_mask_image.png')
182
 
 
183
 
184
- mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
185
 
186
- # final_mask = convert_to_image(mask)
187
- # final_mask.save("final_mask.png")
188
 
189
- # final_mask_grat = convert_to_image(mask_gray)
190
- # final_mask_grat.save("final_mask_grat.png")
191
 
192
- mask = mask.resize((768, 1024), Image.NEAREST)
193
- mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
194
- # Save the resized masks
195
- mask.save("mask_resized.png")
196
- mask_gray.save("mask_gray_resized.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
- masked_vton_img = Image.composite(mask_gray, vton_img, mask)
 
 
 
 
 
 
 
 
199
  masked_vton_img.save("masked_vton_img.png")
200
 
 
 
201
  images = ootd_model_dc(
202
  model_type=model_type,
203
  category=category_dict[category],
204
  image_garm=garm_img,
205
  image_vton=masked_vton_img,
206
- mask=mask,
207
  image_ori=vton_img,
208
  num_samples=1,
209
  num_steps=10,
210
  image_scale= 2.0,
211
  seed=-1,
212
  )
213
-
214
  return images
215
 
216
  # is_upper = False
@@ -218,6 +304,8 @@ def process_dc(vton_img, garm_img, category):
218
 
219
  block = gr.Blocks().queue()
220
  with block:
 
 
221
  with gr.Row():
222
  gr.Markdown("# ")
223
 
@@ -258,6 +346,7 @@ with block:
258
  os.path.join(example_path, 'model/051918_0.jpg'),
259
  os.path.join(example_path, 'model/051962_0.jpg'),
260
  os.path.join(example_path, 'model/049205_0.jpg'),
 
261
  ],
262
 
263
  )
@@ -273,7 +362,7 @@ with block:
273
  os.path.join(example_path, 'model/052472_0.jpg'),
274
  os.path.join(example_path, 'model/053514_0.jpg'),
275
  os.path.join(example_path, 'model/053228_0.jpg'),
276
- os.path.join(example_path, 'model/052964_0.jpg'),
277
  os.path.join(example_path, 'model/053700_0.jpg'),
278
  ],
279
 
@@ -348,9 +437,17 @@ with block:
348
  os.path.join(garment_path, '053319_1.jpg'),
349
  os.path.join(garment_path, '052234_1.jpg'),
350
  ])
351
- with gr.Column():
352
- result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
353
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
354
  run_button_dc = gr.Button(value="Run")
355
  # n_samples_dc = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
356
  # n_steps_dc = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
@@ -359,10 +456,33 @@ with block:
359
  # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
360
 
361
  # ips_dc = [vton_img_dc, garm_img_dc, category_dc]
 
362
 
363
 
364
  ips_dc = [vton_img_dc, garm_img_dc ,category_dc]
365
- run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
 
368
  block.launch(server_name="0.0.0.0", server_port=7860 )
 
3
  from pathlib import Path
4
  import sys
5
  import torch
6
+ from PIL import Image, ImageOps , ImageDraw
7
  import numpy as np
8
  from utils_ootd import get_mask_location
9
  from cloths_db import cloths_map, modeL_db
 
20
  # torch.set_default_dtype(torch.float16)
21
 
22
 
23
+ openpose_model = OpenPose(0)
24
+
25
  parsing_model_hd = Parsing(0)
26
  ootd_model_hd = OOTDiffusionHD(0)
27
 
28
+
29
  parsing_model_dc = Parsing(0)
30
  ootd_model_dc = OOTDiffusionDC(0)
31
 
 
42
  model_dc = os.path.join(example_path, 'model/model_8.png')
43
  garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
44
 
45
+ openpose_model.preprocessor.body_estimation.model.to('cuda')
46
+ #model dc
47
+
48
  ootd_model_dc.pipe.to('cuda')
49
  ootd_model_dc.image_encoder.to('cuda')
50
  ootd_model_dc.text_encoder.to('cuda')
51
 
52
+
53
+ #model hd
54
+
55
+ # ootd_model_hd.pipe.to('cuda')
56
+ # ootd_model_hd.image_encoder.to('cuda')
57
+ # ootd_model_hd.text_encoder.to('cuda')
58
+
59
+
60
  def convert_to_image(image_array):
61
  if isinstance(image_array, np.ndarray):
62
  # Normalize the data to the range [0, 255]
 
80
  category = 0 # 0:upperbody; 1:lowerbody; 2:dress
81
 
82
  with torch.no_grad():
83
+ # openpose_model_hd.preprocessor.body_estimation.model.to('cuda')
84
+ # ootd_model_hd.pipe.to('cuda')
85
+ # ootd_model_hd.image_encoder.to('cuda')
86
+ # ootd_model_hd.text_encoder.to('cuda')
87
 
88
+ # garm_img = Image.open(garm_img).resize((768, 1024))
89
+ # vton_img = Image.open(vton_img).resize((768, 1024))
90
+ # keypoints = openpose_model(vton_img.resize((384, 512)))
91
+
92
  garm_img = Image.open(garm_img).resize((768, 1024))
93
  vton_img = Image.open(vton_img).resize((768, 1024))
94
+ keypoints ,candidate , subset = openpose_model(vton_img.resize((384, 512)))
95
+
96
+ print(len(keypoints["pose_keypoints_2d"]))
97
+ print(keypoints["pose_keypoints_2d"])
98
+
99
+
100
+
101
  model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))
102
 
103
  mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
 
115
  image_ori=vton_img,
116
  num_samples=n_samples,
117
  num_steps=n_steps,
118
+ image_scale=2.0,
119
+ seed=42,
120
  )
121
 
122
  return images
123
 
124
+ def create_bw_mask(size):
125
+ width, height = size
126
+ mask = Image.new('L', (width, height))
127
+ draw = ImageDraw.Draw(mask)
128
+ draw.rectangle([0, 0, width, height // 2], fill=255) # top half white
129
+ draw.rectangle([0, height // 2, width, height], fill=0) # bottom half black
130
+ return mask
131
 
132
+ def create_mask(vton_img, garm_img, category):
133
+
134
+ model_type = 'dc'
135
+ if category == 'Upper-body':
136
+ category = 0
137
+ elif category == 'Lower-body':
138
+ category = 1
139
+ else:
140
+ category =2
141
+
142
+ with torch.no_grad():
143
+ # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
144
+ # ootd_model_dc.pipe.to('cuda')
145
+ # ootd_model_dc.image_encoder.to('cuda')
146
+ # ootd_model_dc.text_encoder.to('cuda')
147
+
148
+ garm_img = Image.open(garm_img).resize((768, 1024))
149
+ vton_img = Image.open(vton_img).resize((768, 1024))
150
+ keypoints = openpose_model(vton_img.resize((384, 512)))
151
 
152
+ print(len(keypoints["pose_keypoints_2d"]))
153
+ print(keypoints["pose_keypoints_2d"])
 
 
 
 
 
 
 
154
 
155
+ # person_image = np.asarray(vton_img)
 
 
 
 
 
 
 
 
156
 
 
 
157
 
158
+ # print(len(person_image))
159
+
160
 
161
+ # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
162
 
163
+ # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
164
+ # output_image = Image.fromarray(output)
165
+ # output_image.save('keypose.png')
166
 
 
167
 
 
 
 
168
 
169
+ left_point = keypoints["pose_keypoints_2d"][2]
170
+ right_point = keypoints["pose_keypoints_2d"][5]
171
 
172
+ neck_point = keypoints["pose_keypoints_2d"][1]
173
+ hip_point = keypoints["pose_keypoints_2d"][8]
174
 
 
 
175
 
 
 
176
 
177
+ print(f'left shoulder - {left_point}')
178
+ print(f'right shoulder - {right_point}')
179
+
180
+ # #find disctance using Euclidian distance
181
+ shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
182
 
183
+ height_pixels = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
184
 
 
 
 
 
 
185
 
186
+ # # Assuming an average human height
187
+ average_height_cm = 172.72 *1.5
188
 
189
+ # Conversion factor from pixels to cm
190
+ conversion_factor = average_height_cm / height_pixels
191
 
192
+ # Convert shoulder width to real-world units
193
+ shoulder_width_cm = shoulder_width_pixels * conversion_factor
194
 
195
+ print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
196
+ print(f'Estimated height (in pixels): {height_pixels}')
197
+ print(f'Conversion factor (pixels to cm): {conversion_factor}')
198
+ print(f'Shoulder width (in cm): {shoulder_width_cm}')
199
+ print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
200
 
201
+ model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
 
202
 
203
+ model_parse_image = convert_to_image(model_parse)
204
+ face_mask_image = convert_to_image(face_mask)
 
 
 
205
 
206
+ # Save the images
207
+ model_parse_image.save('model_parse_image.png')
208
+ face_mask_image.save('face_mask_image.png')
209
 
210
+
 
211
 
 
 
 
212
 
213
+ mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
214
 
 
215
 
216
+ # up_mask, up_mask_gray = get_mask_location(model_type, category_dict_utils[0], model_parse, keypoints)
217
+ # lo_mask, lo_mask_gray = get_mask_location(model_type, category_dict_utils[1], model_parse, keypoints)
218
 
 
 
219
 
220
+ # mask = Image.composite(up_mask,lo_mask,up_mask)
221
+ # mask_gray = Image.composite(up_mask_gray, lo_mask_gray,up_mask)
222
+
223
+ mask = mask.resize((768, 1024), Image.NEAREST)
224
+ mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
225
+
226
+ # if modeL_db[vton_img] == 0:
227
+ # Create a black-and-white mask
228
+ bw_mask = create_bw_mask((768, 1024))
229
+ #crete empty black image with mode L
230
+ temp_img = Image.new("L", (768, 1024), 0)
231
+ mask = Image.composite(mask, temp_img, bw_mask)
232
+
233
+
234
+
235
+
236
+
237
+ # print(mask)
238
+ # Save the resized masks
239
+ mask.save("mask_resized.png")
240
+ mask_gray.save("mask_gray_resized.png")
241
+
242
+ return [mask, mask_gray], mask, mask_gray
243
+ # @spaces.GPU
244
+ def process_dc(vton_img, garm_img, category, mask,mask_gray):
245
+ model_type = 'dc'
246
+ if category == 'Upper-body':
247
+ category = 0
248
+ elif category == 'Lower-body':
249
+ category = 1
250
+ else:
251
+ category =2
252
+
253
+ # Extract the composite images from the edit data
254
+ edited_mask = mask['composite']
255
+ edited_mask_gray = mask_gray['composite']
256
+ # print(edited_mask)
257
+
258
+ garm_img = Image.open(garm_img).resize((768, 1024))
259
+ vton_img = Image.open(vton_img).resize((768, 1024))
260
+
261
+
262
+
263
+
264
+
265
+ # print(f'vton_img is {vton_img}')
266
+ with torch.no_grad():
267
+ # Ensure both masks are in 'L' mode (grayscale)
268
+ if edited_mask.mode != 'L':
269
+ edited_mask = edited_mask.convert('L')
270
+ if edited_mask_gray.mode != 'L':
271
+ edited_mask_gray = edited_mask_gray.convert('L')
272
+
273
 
274
+ # Ensure all images and masks are the same size
275
+ edited_mask = edited_mask.resize((768, 1024), Image.NEAREST)
276
+ edited_mask_gray = edited_mask_gray.resize((768, 1024), Image.NEAREST)
277
+
278
+ print(f'mask: {edited_mask}')
279
+ print(f'vton_img: {vton_img}')
280
+
281
+
282
+ masked_vton_img = Image.composite(edited_mask_gray, vton_img, edited_mask)
283
  masked_vton_img.save("masked_vton_img.png")
284
 
285
+ print(f'category is {category}')
286
+
287
  images = ootd_model_dc(
288
  model_type=model_type,
289
  category=category_dict[category],
290
  image_garm=garm_img,
291
  image_vton=masked_vton_img,
292
+ mask=edited_mask,
293
  image_ori=vton_img,
294
  num_samples=1,
295
  num_steps=10,
296
  image_scale= 2.0,
297
  seed=-1,
298
  )
299
+ # return None
300
  return images
301
 
302
  # is_upper = False
 
304
 
305
  block = gr.Blocks().queue()
306
  with block:
307
+ mask_state = gr.State()
308
+ mask_gray_state = gr.State()
309
  with gr.Row():
310
  gr.Markdown("# ")
311
 
 
346
  os.path.join(example_path, 'model/051918_0.jpg'),
347
  os.path.join(example_path, 'model/051962_0.jpg'),
348
  os.path.join(example_path, 'model/049205_0.jpg'),
349
+ os.path.join(example_path, 'model/05997_00.jpg'),
350
  ],
351
 
352
  )
 
362
  os.path.join(example_path, 'model/052472_0.jpg'),
363
  os.path.join(example_path, 'model/053514_0.jpg'),
364
  os.path.join(example_path, 'model/053228_0.jpg'),
365
+ os.path.join(example_path, 'model/06802_00.jpg'),
366
  os.path.join(example_path, 'model/053700_0.jpg'),
367
  ],
368
 
 
437
  os.path.join(garment_path, '053319_1.jpg'),
438
  os.path.join(garment_path, '052234_1.jpg'),
439
  ])
 
 
440
  with gr.Column():
441
+ mask_gallery = gr.Gallery(label="Created Masks")
442
+
443
+ result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
444
+ with gr.Row():
445
+ # Add ImageEditor for mask editing
446
+ mask_editor = gr.ImageEditor(label="Edit Mask", type="pil")
447
+ # Add ImageEditor for mask_gray editing
448
+ mask_gray_editor = gr.ImageEditor(label="Edit Mask Gray", type="pil")
449
+ with gr.Column():
450
+ create_mask_button = gr.Button(value="Create Mask")
451
  run_button_dc = gr.Button(value="Run")
452
  # n_samples_dc = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
453
  # n_steps_dc = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
 
456
  # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
457
 
458
  # ips_dc = [vton_img_dc, garm_img_dc, category_dc]
459
+
460
 
461
 
462
  ips_dc = [vton_img_dc, garm_img_dc ,category_dc]
463
+
464
+ # create_mask_button.click(
465
+ # fn=create_mask,
466
+ # inputs=ips_dc,
467
+ # outputs=[mask_gallery, mask_state, mask_gray_state]
468
+ # )
469
+ create_mask_button.click(
470
+ fn=create_mask,
471
+ inputs=ips_dc,
472
+ outputs=[mask_gallery, mask_editor, mask_gray_editor]
473
+ )
474
+
475
+ # run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
476
+ # run_button_dc.click(
477
+ # fn=process_dc,
478
+ # inputs=ips_dc + [mask_state, mask_gray_state],
479
+ # outputs=[result_gallery_dc])
480
+ run_button_dc.click(
481
+ fn=process_dc,
482
+ inputs=[vton_img_dc, garm_img_dc, category_dc, mask_editor, mask_gray_editor],
483
+ outputs=[result_gallery_dc]
484
+ )
485
+
486
 
487
 
488
  block.launch(server_name="0.0.0.0", server_port=7860 )
run/testing.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from pathlib import Path
4
+ import sys
5
+ import torch
6
+ from PIL import Image, ImageOps
7
+ import numpy as np
8
+ from utils_ootd import get_mask_location
9
+ from cloths_db import cloths_map, modeL_db
10
+ PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
11
+ sys.path.insert(0, str(PROJECT_ROOT))
12
+
13
+ from preprocess.openpose.run_openpose import OpenPose
14
+ from preprocess.humanparsing.run_parsing import Parsing
15
+ from ootd.inference_ootd_hd import OOTDiffusionHD
16
+ from ootd.inference_ootd_dc import OOTDiffusionDC
17
+ from preprocess.openpose.annotator.openpose.util import draw_bodypose
18
+
19
+
20
+
21
+ openpose_model = OpenPose(0)
22
+
23
+
24
+
25
+ parsing_model_dc = Parsing(0)
26
+ ootd_model_dc = OOTDiffusionDC(0)
27
+
28
+ category_dict = ['upperbody', 'lowerbody', 'dress']
29
+ category_dict_utils = ['upper_body', 'lower_body', 'dresses']
30
+
31
+
32
+ example_path = os.path.join(os.path.dirname(__file__), 'examples')
33
+ garment_path = os.path.join(os.path.dirname(__file__), 'examples','garment')
34
+
35
+
36
+
37
+
38
+ openpose_model.preprocessor.body_estimation.model.to('cuda')
39
+
40
+
41
+ ootd_model_dc.pipe.to('cuda')
42
+ ootd_model_dc.image_encoder.to('cuda')
43
+ ootd_model_dc.text_encoder.to('cuda')
44
+
45
+ def process_dc(vton_img, garm_img, category):
46
+ model_type = 'dc'
47
+ if category == 'Upper-body':
48
+ category = 0
49
+ elif category == 'Lower-body':
50
+ category = 1
51
+ else:
52
+ category =2
53
+
54
+ with torch.no_grad():
55
+ # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
56
+ # ootd_model_dc.pipe.to('cuda')
57
+ # ootd_model_dc.image_encoder.to('cuda')
58
+ # ootd_model_dc.text_encoder.to('cuda')
59
+
60
+ garm_img = Image.open(garm_img).resize((768, 1024))
61
+ vton_img = Image.open(vton_img).resize((768, 1024))
62
+ keypoints = openpose_model(vton_img.resize((384, 512)))
63
+
64
+ print(len(keypoints["pose_keypoints_2d"]))
65
+ print(keypoints["pose_keypoints_2d"])
66
+
67
+ # person_image = np.asarray(vton_img)
68
+
69
+
70
+ # print(len(person_image))
71
+
72
+
73
+ # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
74
+
75
+ # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
76
+ # output_image = Image.fromarray(output)
77
+ # output_image.save('keypose.png')
78
+
79
+
80
+
81
+ left_point = keypoints["pose_keypoints_2d"][2]
82
+ right_point = keypoints["pose_keypoints_2d"][5]
83
+
84
+ neck_point = keypoints["pose_keypoints_2d"][1]
85
+ hip_point = keypoints["pose_keypoints_2d"][8]
86
+
87
+
88
+
89
+ print(f'left shoulder - {left_point}')
90
+ print(f'right shoulder - {right_point}')
91
+
92
+ # #find disctance using Euclidian distance
93
+ shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
94
+
95
+ height_pixels = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
96
+
97
+
98
+ # # Assuming an average human height
99
+ average_height_cm = 172.72 *1.5
100
+
101
+ # Conversion factor from pixels to cm
102
+ conversion_factor = average_height_cm / height_pixels
103
+
104
+ # Convert shoulder width to real-world units
105
+ shoulder_width_cm = shoulder_width_pixels * conversion_factor
106
+
107
+ print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
108
+ print(f'Estimated height (in pixels): {height_pixels}')
109
+ print(f'Conversion factor (pixels to cm): {conversion_factor}')
110
+ print(f'Shoulder width (in cm): {shoulder_width_cm}')
111
+ print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
112
+
113
+ model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
114
+
115
+ # model_parse_image = convert_to_image(model_parse)
116
+ # face_mask_image = convert_to_image(face_mask)
117
+
118
+ # Save the images
119
+ # model_parse_image.save('model_parse_image.png')
120
+ # face_mask_image.save('face_mask_image.png')
121
+
122
+
123
+
124
+
125
+ mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
126
+
127
+
128
+ # up_mask, up_mask_gray = get_mask_location(model_type, category_dict_utils[0], model_parse, keypoints)
129
+ # lo_mask, lo_mask_gray = get_mask_location(model_type, category_dict_utils[1], model_parse, keypoints)
130
+
131
+
132
+ # mask = Image.composite(up_mask,lo_mask,up_mask)
133
+ # mask_gray = Image.composite(up_mask_gray, lo_mask_gray,up_mask)
134
+
135
+ mask = mask.resize((768, 1024), Image.NEAREST)
136
+ mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
137
+ # Save the resized masks
138
+ mask.save("mask_resized.png")
139
+ mask_gray.save("mask_gray_resized.png")
140
+
141
+ masked_vton_img = Image.composite(mask_gray, vton_img, mask)
142
+ masked_vton_img.save("masked_vton_img.png")
143
+
144
+ print(f'category is {category}')
145
+
146
+ # images = ootd_model_dc(
147
+ # model_type=model_type,
148
+ # category=category_dict[category],
149
+ # image_garm=garm_img,
150
+ # image_vton=masked_vton_img,
151
+ # mask=mask,
152
+ # image_ori=vton_img,
153
+ # num_samples=1,
154
+ # num_steps=10,
155
+ # image_scale= 2.0,
156
+ # seed=-1,
157
+ # )
158
+ # return None
159
+ return None
160
+
161
+
162
+ if __name__ == '__main__':
163
+ model_dc = os.path.join(example_path, 'model/model_8.png')
164
+ garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
165
+
166
+
167
+ print(process_dc(model_dc,garment_dc,0))
run/utils_ootd.py CHANGED
@@ -57,6 +57,8 @@ def get_mask_location(model_type, category, model_parse: Image.Image, keypoint:
57
  im_parse = model_parse.resize((width, height), Image.NEAREST)
58
  parse_array = np.array(im_parse)
59
 
 
 
60
  if model_type == 'hd':
61
  arm_width = 60
62
  elif model_type == 'dc':
@@ -81,9 +83,12 @@ def get_mask_location(model_type, category, model_parse: Image.Image, keypoint:
81
  arms = arms_left + arms_right
82
 
83
  if category == 'dresses':
 
 
 
84
  parse_mask = (parse_array == 7).astype(np.float32) + \
85
  (parse_array == 4).astype(np.float32) + \
86
- (parse_array == 5).astype(np.float32) + \
87
  (parse_array == 6).astype(np.float32)
88
 
89
  parser_mask_changeable += np.logical_and(parse_array, np.logical_not(parser_mask_fixed))
 
57
  im_parse = model_parse.resize((width, height), Image.NEAREST)
58
  parse_array = np.array(im_parse)
59
 
60
+ # print(parse_array.shape)
61
+
62
  if model_type == 'hd':
63
  arm_width = 60
64
  elif model_type == 'dc':
 
83
  arms = arms_left + arms_right
84
 
85
  if category == 'dresses':
86
+ # parse_mask = (parse_array == 5).astype(np.float32) # + \
87
+ # #(parse_array == 6).astype(np.float32)
88
+
89
  parse_mask = (parse_array == 7).astype(np.float32) + \
90
  (parse_array == 4).astype(np.float32) + \
91
+ (parse_array == 5).astype(np.float32) + \
92
  (parse_array == 6).astype(np.float32)
93
 
94
  parser_mask_changeable += np.logical_and(parse_array, np.logical_not(parser_mask_fixed))