Spaces:

mukeshpandey2628
/

GT_VTR3_1

Runtime error

App Files Files Community

Ubuntu commited on Jul 23, 2024

Commit

3bc69b8

1 Parent(s): 129f40a

improved inference time

Browse files

Files changed (38) hide show

.gitignore +21 -0
app.py +242 -0
app2.py +391 -0
checkpoints/openpose/ckpts/body_pose_model.pth +3 -0
debugging_setps.txt +47 -0
nohup.out +286 -0
ootd/inference_ootd.py +3 -0
ootd/inference_ootd_dc.py +1 -0
ootd/pipelines_ootd/__pycache__/attention_garm.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/attention_vton.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/transformer_garm_2d.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/transformer_vton_2d.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/unet_garm_2d_blocks.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/unet_garm_2d_condition.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/unet_vton_2d_blocks.cpython-310.pyc +0 -0
ootd/pipelines_ootd/__pycache__/unet_vton_2d_condition.cpython-310.pyc +0 -0
ootd/pipelines_ootd/pipeline_ootd.py +4 -0
preprocess/humanparsing/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
preprocess/humanparsing/datasets/__pycache__/simple_extractor_dataset.cpython-310.pyc +0 -0
preprocess/humanparsing/run_parsing.py +14 -3
preprocess/humanparsing/utils/__pycache__/__init__.cpython-310.pyc +0 -0
preprocess/humanparsing/utils/__pycache__/transforms.cpython-310.pyc +0 -0
preprocess/openpose/annotator/__pycache__/util.cpython-310.pyc +0 -0
preprocess/openpose/annotator/openpose/__pycache__/__init__.cpython-310.pyc +0 -0
preprocess/openpose/annotator/openpose/__pycache__/body.cpython-310.pyc +0 -0
preprocess/openpose/annotator/openpose/__pycache__/face.cpython-310.pyc +0 -0
preprocess/openpose/annotator/openpose/__pycache__/hand.cpython-310.pyc +0 -0
preprocess/openpose/annotator/openpose/__pycache__/model.cpython-310.pyc +0 -0
preprocess/openpose/annotator/openpose/__pycache__/util.cpython-310.pyc +0 -0
preprocess/openpose/model_8.png +0 -0
preprocess/openpose/run_openpose.py +15 -5
ps.txt +1 -0
requirements.txt +2 -1
run/cloths_db.py +133 -0
run/gradio_ootd copy_backup.py +380 -0
run/gradio_ootd.py +221 -142
test_df.py +24 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,21 @@

+ootd/__pycache__/
+preprocess/humanparsing/__pycache__/
+preprocess/openpose/__pycache__/
+run/__pycache__/
+face_mask_image.png
+keypose.png
+mask_gray_resized.png
+mask_resized.png
+masked_vton_img.png
+model_parse_image.png
+run/examples/model/male/femal_s_34_test.png
+run/examples/model/male/femal_s_34.png
+run/examples/model/male/female.png
+run/examples/model/male/male_2.png
+run/examples/model/male/male_2xl_47_5.png
+run/examples/model/male/male_large_42.png
+run/examples/model/male/male_medium_40.png
+run/examples/model/male/male_side.png
+run/examples/model/male/male_small_38.png
+run/examples/model/male/male_small.png
+run/examples/model/male/male_xl_45.png

app.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from flask import Flask, request, jsonify,send_file, Response
+from flask_cors import CORS
+import logging
+import gc
+import os
+from io import BytesIO
+from pathlib import Path
+import sys
+import torch
+from PIL import Image, ImageOps
+import numpy as np
+from run.utils_ootd import get_mask_location
+from run.cloths_db import cloths_map, modeL_db
+from preprocess.openpose.run_openpose import OpenPose
+from preprocess.humanparsing.run_parsing import Parsing
+from ootd.inference_ootd_dc import OOTDiffusionDC
+PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
+sys.path.insert(0, str(PROJECT_ROOT))
+#run python garbage collector and nvidia cuda clear memory
+gc.collect()
+torch.cuda.empty_cache()
+# Setup Flask server
+app = Flask(__name__)
+CORS(app, origins="*")  # Enable CORS for the entire app
+logger = logging.getLogger()
+openpose_model = OpenPose(0)
+parsing_model_dc = Parsing(0)
+ootd_model_dc = OOTDiffusionDC(0)
+example_path = os.path.join(os.path.dirname(__file__), 'examples')
+garment_path = os.path.join(os.path.dirname(__file__), 'examples','garment')
+openpose_model.preprocessor.body_estimation.model.to('cuda')
+ootd_model_dc.pipe.to('cuda')
+ootd_model_dc.image_encoder.to('cuda')
+ootd_model_dc.text_encoder.to('cuda')
+category_dict = ['upperbody', 'lowerbody', 'dress']
+category_dict_utils = ['upper_body', 'lower_body', 'dresses']
+# Ensure this directory exists
+UPLOAD_FOLDER = 'temp_images'
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+def process_dc(vton_img, garm_img, category):
+    model_type = 'dc'
+    # if category == 'Upper-body':
+    #     category = 0
+    # elif category == 'Lower-body':
+    #     category = 1
+    # else:
+    #     category = 2
+    with torch.no_grad():
+        # openpose_model.preprocessor.body_estimation.model.to('cuda')
+        # ootd_model_dc.pipe.to('cuda')
+        # ootd_model_dc.image_encoder.to('cuda')
+        # ootd_model_dc.text_encoder.to('cuda')
+        garm_img = Image.open(garm_img).convert('RGB').resize((768, 1024))
+        vton_img = Image.open(vton_img).convert('RGB').resize((768, 1024))
+        keypoints = openpose_model(vton_img.resize((384, 512)))
+        print(len(keypoints["pose_keypoints_2d"]))
+        print(keypoints["pose_keypoints_2d"])
+        left_point = keypoints["pose_keypoints_2d"][2]
+        right_point = keypoints["pose_keypoints_2d"][5]
+        neck_point = keypoints["pose_keypoints_2d"][1]
+        hip_point = keypoints["pose_keypoints_2d"][8]
+        print(f'left shoulder - {left_point}')
+        print(f'right shoulder - {right_point}')
+        # #find disctance using Euclidian distance
+        shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
+        height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
+        # # Assuming an average human height
+        average_height_cm = 172.72 *1.5
+        # Conversion factor from pixels to cm
+        conversion_factor = average_height_cm / height_pixels
+        # Convert shoulder width to real-world units
+        shoulder_width_cm = shoulder_width_pixels * conversion_factor
+        print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
+        print(f'Estimated height (in pixels): {height_pixels}')
+        print(f'Conversion factor (pixels to cm): {conversion_factor}')
+        print(f'Shoulder width (in cm): {shoulder_width_cm}')
+        print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
+        model_parse,_ = parsing_model_dc(vton_img.resize((384, 512)))
+        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+        mask = mask.resize((768, 1024), Image.NEAREST)
+        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+        # Save the resized masks
+        # mask.save("mask_resized.png")
+        # mask_gray.save("mask_gray_resized.png")
+        masked_vton_img = Image.composite(mask_gray, vton_img, mask)
+        # masked_vton_img.save("masked_vton_img.png")
+        print(f'category is {category}')
+        images = ootd_model_dc(
+            model_type=model_type,
+            category=category_dict[category],
+            image_garm=garm_img,
+            image_vton=masked_vton_img,
+            mask=mask,
+            image_ori=vton_img,
+            num_samples=1,
+            num_steps=10,
+            image_scale=  1.0,
+            seed=-1,
+        )
+    return images
+@app.route('/')
+def root():
+    try:
+        response_data = {"message": "This is VTR API v1.0"}
+        return jsonify(response_data)
+    except Exception as e:
+        logger.error(f"Root endpoint error: {str(e)}")
+        response_data = {"message": "Internal server Error"}
+        return jsonify(response_data), 500
+#write Flask api name "generate" with POST method that will input 2 images and return 1 image
+@app.route('/generate', methods=['POST'])
+def generate():
+    """
+    A Flask route that handles a POST request to the '/generate' endpoint.
+    It expects two files, 'garm_img' and 'vton_img', to be included in the request.
+    The function calls the 'process_dc' function with the provided files and the
+    category 'Upper-body'. It then sends the processed image as a file with the
+    mimetype 'image/png' and returns it to the client. If any exception occurs,
+    the function logs the error and returns a JSON response with a status code of
+    500.
+    Parameters:
+        None
+    Returns:
+        A Flask response object with the processed image as a file.
+    Raises:
+        None
+    """
+    #    if category == 'Upper-body':
+    #     category = 0
+    # elif category == 'Lower-body':
+    #     category = 1
+    # else:
+    #     category = 2
+    try:
+        garm_img = request.files['garm_img']
+        vton_img = request.files['vton_img']
+        category = 0  # Default to Upper-body if not specified
+        # Save the uploaded files
+        garm_path = os.path.join(UPLOAD_FOLDER, 'garm_input.png')
+        vton_path = os.path.join(UPLOAD_FOLDER, 'vton_input.png')
+        garm_img.save(garm_path)
+        vton_img.save(vton_path)
+        # Convert file objects to bytes IO objects
+        # garm_img = BytesIO(garm_img.read())
+        # vton_img = BytesIO(vton_img.read())
+        output_images = process_dc(garm_img=garm_img,
+                                   vton_img=vton_img,
+                                   category=category)
+        if not output_images:
+            return Response("No output image generated", status=500)
+        output_image = output_images[0]  # Get the first image
+        # Convert PIL Image to bytes
+        img_byte_arr = BytesIO()
+        output_image.save(img_byte_arr, format='PNG')
+        img_byte_arr = img_byte_arr.getvalue()
+        return Response(img_byte_arr, mimetype='image/png')
+    except Exception as e:
+        print(f"Error: {str(e)}")  # Log the error
+        return Response(str(e), status=500)
+if __name__ == '__main__':
+    app.run(debug=False, host='0.0.0.0', port=5009)
+# nohup gunicorn -b 0.0.0.0:5003 sentiment_api:app &

app2.py ADDED Viewed

	@@ -0,0 +1,391 @@

+from flask import Flask, request, jsonify,send_file, Response
+from flask_cors import CORS
+import logging
+import gc
+import os
+from threading import Thread
+from flask_sse import sse
+import uuid
+import redis
+import multiprocessing
+from werkzeug.exceptions import NotFound, InternalServerError
+import threading
+from collections import OrderedDict
+from flask import current_app
+import time
+from celery import Celery
+from io import BytesIO
+from pathlib import Path
+import sys
+import torch
+from PIL import Image, ImageOps
+import numpy as np
+from run.utils_ootd import get_mask_location
+from run.cloths_db import cloths_map, modeL_db
+from preprocess.openpose.run_openpose import OpenPose
+from preprocess.humanparsing.run_parsing import Parsing
+from ootd.inference_ootd_dc import OOTDiffusionDC
+PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
+sys.path.insert(0, str(PROJECT_ROOT))
+from queue import Queue
+from celery_worker import process_image
+#run python garbage collector and nvidia cuda clear memory
+gc.collect()
+torch.cuda.empty_cache()
+# Set the start method to 'spawn'
+# multiprocessing.set_start_method('spawn', force=True)
+# Setup Flask server
+app = Flask(__name__)
+app.config.update(
+    CELERY_BROKER_URL='redis://localhost:6379',
+    CELERY_RESULT_BACKEND='redis://localhost:6379'
+)
+# Initialize Celery
+celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])
+celery.conf.update(app.config)
+logger = logging.getLogger()
+openpose_model = OpenPose(0)
+parsing_model_dc = Parsing(0)
+ootd_model_dc = OOTDiffusionDC(0)
+example_path = os.path.join(os.path.dirname(__file__), 'examples')
+garment_path = os.path.join(os.path.dirname(__file__), 'examples','garment')
+openpose_model.preprocessor.body_estimation.model.to('cuda')
+ootd_model_dc.pipe.to('cuda')
+ootd_model_dc.image_encoder.to('cuda')
+ootd_model_dc.text_encoder.to('cuda')
+category_dict = ['upperbody', 'lowerbody', 'dress']
+category_dict_utils = ['upper_body', 'lower_body', 'dresses']
+# Ensure this directory exists
+UPLOAD_FOLDER = 'temp_images'
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+OUTPUT_FOLDER = 'path/to/output/folder'
+image_results = {}
+image_results_lock = threading.Lock()
+# Use an OrderedDict to limit the number of stored results
+image_results = OrderedDict()
+MAX_RESULTS = 100  # Adjust this value based on your needs
+def process_dc(vton_img, garm_img, category,progress_callback):
+    model_type = 'dc'
+    if category == 'Upper-body':
+        category = 0
+    elif category == 'Lower-body':
+        category = 1
+    else:
+        category = 2
+    with torch.no_grad():
+        # openpose_model.preprocessor.body_estimation.model.to('cuda')
+        # ootd_model_dc.pipe.to('cuda')
+        # ootd_model_dc.image_encoder.to('cuda')
+        # ootd_model_dc.text_encoder.to('cuda')
+        garm_img = Image.open(garm_img).resize((768, 1024))
+        vton_img = Image.open(vton_img).resize((768, 1024))
+        keypoints = openpose_model(vton_img.resize((384, 512)))
+        print(len(keypoints["pose_keypoints_2d"]))
+        print(keypoints["pose_keypoints_2d"])
+        left_point = keypoints["pose_keypoints_2d"][2]
+        right_point = keypoints["pose_keypoints_2d"][5]
+        neck_point = keypoints["pose_keypoints_2d"][1]
+        hip_point = keypoints["pose_keypoints_2d"][8]
+        print(f'left shoulder - {left_point}')
+        print(f'right shoulder - {right_point}')
+        # #find disctance using Euclidian distance
+        shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
+        height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
+        # # Assuming an average human height
+        average_height_cm = 172.72 *1.5
+        # Conversion factor from pixels to cm
+        conversion_factor = average_height_cm / height_pixels
+        # Convert shoulder width to real-world units
+        shoulder_width_cm = shoulder_width_pixels * conversion_factor
+        print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
+        print(f'Estimated height (in pixels): {height_pixels}')
+        print(f'Conversion factor (pixels to cm): {conversion_factor}')
+        print(f'Shoulder width (in cm): {shoulder_width_cm}')
+        print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
+        model_parse,_ = parsing_model_dc(vton_img.resize((384, 512)))
+        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+        mask = mask.resize((768, 1024), Image.NEAREST)
+        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+        # Save the resized masks
+        # mask.save("mask_resized.png")
+        # mask_gray.save("mask_gray_resized.png")
+        masked_vton_img = Image.composite(mask_gray, vton_img, mask)
+        # masked_vton_img.save("masked_vton_img.png")
+        print(f'category is {category}')
+        # images = ootd_model_dc(
+        #     model_type=model_type,
+        #     category=category_dict[category],
+        #     image_garm=garm_img,
+        #     image_vton=masked_vton_img,
+        #     mask=mask,
+        #     image_ori=vton_img,
+        #     num_samples=3,
+        #     num_steps=20,
+        #     image_scale=  2.0,
+        #     seed=-1,
+        # )
+        images = ootd_model_dc(
+            model_type=model_type,
+            category=category_dict[category],
+            image_garm=garm_img,
+            image_vton=masked_vton_img,
+            mask=mask,
+            image_ori=vton_img,
+            num_samples=2,
+            num_steps=10,
+            image_scale=2.0,
+            seed=42,
+            progress_callback=progress_callback,
+            progress_interval=1,  # Update progress every step
+        )
+    return images
+# def create_progress_callback(session_id):
+#     def progress_callback(step, total_steps):
+#         progress = int((step + 1) / total_steps * 100)
+#         print(f"Publishing progress {progress} for session {session_id}")
+#         sse.publish({"progress": progress}, type='progress', channel=session_id)
+#     return progress_callback
+# @celery.task(bind=True)
+# def process_image(self, session_id, garm_path, vton_path, category):
+#     try:
+#         print(f"Starting process_image task for session {session_id}")
+#         progress_callback = create_progress_callback(session_id)
+#         output_images = process_dc(garm_img=garm_path,
+#                                    vton_img=vton_path,
+#                                    category=category,
+#                                    progress_callback=progress_callback)
+#         if not output_images:
+#             sse.publish({"error": "No output image generated"}, type='error', channel=session_id)
+#             return None
+#         output_image = output_images[0]
+#         # Generate a UUID for the output image
+#         image_uuid = str(uuid.uuid4())
+#         # Create the output filename with the UUID
+#         output_filename = f"{image_uuid}.png"
+#         output_path = os.path.join(OUTPUT_FOLDER, output_filename)
+#         # Save the output image
+#         output_image.save(output_path, format='PNG')
+#         # Add the UUID and path to the image_results map
+#         with image_results_lock:
+#             image_results[image_uuid] = output_path
+#         sse.publish({"message": "Processing complete", "uuid": image_uuid}, type='complete', channel=session_id)
+#         return image_uuid
+#     except Exception as e:
+#         sse.publish({"error": str(e)}, type='error', channel=session_id)
+#         return print(f"panic in process_image: {str(e)}")
+@app.route('/')
+def root():
+    try:
+        response_data = {"message": "This is VTR API v1.0"}
+        return jsonify(response_data)
+    except Exception as e:
+        logger.error(f"Root endpoint error: {str(e)}")
+        response_data = {"message": "Internal server Error"}
+        return jsonify(response_data), 500
+# @app.route('/stream')
+# def stream():
+#     session_id = request.args.get('channel')
+#     if not session_id:
+#         return "No channel specified", 400
+#     return Response(sse.stream(), content_type='text/event-stream')
+@app.route('/test_sse/<session_id>')
+def test_sse(session_id):
+    sse.publish({"message": "Test SSE"}, type='test', channel=session_id)
+    return "SSE test message sent"
+#write Flask api name "generate" with POST method that will input 2 images and return 1 image
+@app.route('/generate', methods=['POST'])
+def generate():
+    """
+    A Flask route that handles a POST request to the '/generate' endpoint.
+    It expects two files, 'garm_img' and 'vton_img', to be included in the request.
+    The function calls the 'process_dc' function with the provided files and the
+    category 'Upper-body'. It then sends the processed image as a file with the
+    mimetype 'image/png' and returns it to the client. If any exception occurs,
+    the function logs the error and returns a JSON response with a status code of
+    500.
+    Parameters:
+        None
+    Returns:
+        A Flask response object with the processed image as a file.
+    Raises:
+        None
+    """
+    #    if category == 'Upper-body':
+    #     category = 0
+    # elif category == 'Lower-body':
+    #     category = 1
+    # else:
+    #     category = 2
+    try:
+        cloths_type = ["Upper-body", "Lower-body", "Dress"]
+        garm_img = request.files['garm_img']
+        vton_img = request.files['vton_img']
+        cat = request.form['category']
+        print(f'category is {cat}')
+        category =cloths_type[int(cat)] # Default to Upper-body if not specified
+        # Save the uploaded files
+        garm_path = os.path.join(UPLOAD_FOLDER, 'garm_input.png')
+        vton_path = os.path.join(UPLOAD_FOLDER, 'vton_input.png')
+        garm_img.save(garm_path)
+        vton_img.save(vton_path)
+        # Convert file objects to bytes IO objects
+        # garm_img = BytesIO(garm_img.read())
+        # vton_img = BytesIO(vton_img.read())
+        # Start processing in a background task
+        session_id = str(uuid.uuid4())
+        process_image.apply_async(args=[session_id, garm_path, vton_path, category])
+        # Immediately return the session_id to the client
+        return jsonify({"session_id": session_id, "message": "Processing started"}), 202
+        # while not task.ready():
+        #     time.sleep(1)  # Polling the task status every second
+        # if task.successful():
+        #     img_byte_arr = task.result
+        #     if img_byte_arr:
+        #         return Response(img_byte_arr, mimetype='image/png')
+        #     else:
+        #         return Response("No output image generated", status=500)
+        # else:
+        #     return Response("Processing failed", status=500)
+    except Exception as e:
+        print(f"Error: {str(e)}")  # Log the error
+        return Response(str(e), status=500)
+@app.route('/get_image/<uuid>')
+def get_image(uuid):
+    try:
+        with image_results_lock:
+            if uuid not in image_results:
+                raise NotFound("Invalid UUID or result not available")
+            image_path = image_results[uuid]
+        if not os.path.exists(image_path):
+            raise NotFound("Image file not found")
+        # Determine the MIME type based on the file extension
+        file_extension = os.path.splitext(image_path)[1].lower()
+        mime_type = 'image/jpeg' if file_extension == '.jpg' or file_extension == '.jpeg' else 'image/png'
+        return send_file(image_path, mimetype=mime_type, as_attachment=False)
+    except NotFound as e:
+        logger.warning(f"Get image request failed: {str(e)}")
+        return jsonify({"error": str(e)}), 404
+    except Exception as e:
+        logger.error(f"Unexpected error in get_image: {str(e)}")
+        return jsonify({"error": "An unexpected error occurred"}), 500
+if __name__ == '__main__':
+    app.run(debug=False, host='0.0.0.0', port=5009)
+# nohup gunicorn -b 0.0.0.0:5003 sentiment_api:app &

checkpoints/openpose/ckpts/body_pose_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25a948c16078b0f08e236bda51a385d855ef4c153598947c28c0d47ed94bb746
+size 209267595

debugging_setps.txt ADDED Viewed

	@@ -0,0 +1,47 @@

+initial inference time - 30-40 sec 😁
+1) lowered num_steps for diffusion model to 10 from 20 - inference time = 17-19 sec 👍
+2) moved onxx model from cpu compute to gpu - inference time = 12-14 sec cold start take more time 😀
+working
+1 preprocess images -
+  first target human image preprocess with openpose and humanparse
+  openpose - to get pose information of joints
+  humanparse- to segment image with diffrent part like face, body , background that we can use to
+              determine where to do diffusion using mask
+  merging mask from humanparse on original human image that will we feed into diffusion model
+  processing cloth image -
+    with torch.no_grad():
+        prompt_image = self.auto_processor(images=image_garm, return_tensors="pt").to('cuda')
+        prompt_image = self.image_encoder(prompt_image.data['pixel_values']).image_embeds
+        prompt_image = prompt_image.unsqueeze(1)
+        if model_type == 'hd':
+            prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to('cuda'))[0]
+            prompt_embeds[:, 1:] = prompt_image[:]
+        elif model_type == 'dc':
+            prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to('cuda'))[0]
+            prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1)
+    this will convert cloth image into image embedding and generate prompt embedding using category we provide
+   GatedSelfAttentionDense: This class combines visual features and object features using self-attention.
+    It's likely used to fuse information about the clothing items with the human body image.
+    2 at last we feed both human masked image and
+    cloth image embedding and prompt embedding concated - [image_embeds, prompt_embeds]
+     into diffusion model then running inference the diffusion model -
+     first it will convert image input as latent embedding using VAE ,
+     then perform difussion with paramter we provided, samples, num_steps, noise, seed, etc
+     after num of steps of diffusion we convert that output again in image spce using VAE
+     and thats our output image

nohup.out ADDED Viewed

@@ -0,0 +1,286 @@
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:12,  1.76s/it]
 40%|████      | 4/10 [00:06<00:10,  1.69s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.67s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.64s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.99s/it]
 20%|██        | 2/10 [00:03<00:13,  1.75s/it]
 30%|███       | 3/10 [00:05<00:11,  1.67s/it]
 40%|████      | 4/10 [00:06<00:09,  1.65s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.63s/it]
 60%|██████    | 6/10 [00:09<00:06,  1.61s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.61s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.60s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.60s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:11,  1.70s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.03s/it]
 20%|██        | 2/10 [00:03<00:14,  1.78s/it]
 30%|███       | 3/10 [00:05<00:13,  1.95s/it]
 40%|████      | 4/10 [00:07<00:10,  1.81s/it]
 50%|█████     | 5/10 [00:09<00:08,  1.75s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.70s/it]
 70%|███████   | 7/10 [00:12<00:05,  1.68s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.66s/it]
 90%|█████████ | 9/10 [00:15<00:01,  1.64s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  2.00s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.67s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.65s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:11,  1.70s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.62s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.62s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.02s/it]
 20%|██        | 2/10 [00:03<00:14,  1.79s/it]
 30%|███       | 3/10 [00:05<00:11,  1.71s/it]
 40%|████      | 4/10 [00:06<00:10,  1.68s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.65s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.99s/it]
 20%|██        | 2/10 [00:03<00:14,  1.76s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.65s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.62s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.62s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.78s/it]
 30%|███       | 3/10 [00:05<00:11,  1.71s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.99s/it]
 20%|██        | 2/10 [00:03<00:14,  1.76s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.62s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.62s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  2.00s/it]
 20%|██        | 2/10 [00:03<00:14,  1.76s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.65s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.03s/it]
 20%|██        | 2/10 [00:03<00:14,  1.78s/it]
 30%|███       | 3/10 [00:06<00:16,  2.39s/it]
 40%|████      | 4/10 [00:08<00:12,  2.08s/it]
 50%|█████     | 5/10 [00:09<00:09,  1.91s/it]
 60%|██████    | 6/10 [00:11<00:07,  1.81s/it]
 70%|███████   | 7/10 [00:13<00:05,  1.74s/it]
 80%|████████  | 8/10 [00:14<00:03,  1.70s/it]
 90%|█████████ | 9/10 [00:16<00:01,  1.67s/it]

+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+IMPORTANT: You are using gradio version 4.16.0, however version 4.29.0 is available, please upgrade.
+--------
+Running on local URL:  http://0.0.0.0:7860
+To create a public link, set `share=True` in `launch()`.
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 300557857
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:12,  1.76s/it]
 40%|████      | 4/10 [00:06<00:10,  1.69s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.67s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.64s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+WARNING:  Invalid HTTP request received.
+WARNING:  Invalid HTTP request received.
+WARNING:  Invalid HTTP request received.
+WARNING:  Invalid HTTP request received.
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 1492618498
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.99s/it]
 20%|██        | 2/10 [00:03<00:13,  1.75s/it]
 30%|███       | 3/10 [00:05<00:11,  1.67s/it]
 40%|████      | 4/10 [00:06<00:09,  1.65s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.63s/it]
 60%|██████    | 6/10 [00:09<00:06,  1.61s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.61s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.60s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.60s/it]
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 654897631
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:11,  1.70s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+Traceback (most recent call last):
+  File "/home/ubuntu/GT_VTR3_1/run/gradio_ootd.py", line 1, in <module>
+    import gradio as gr
+ModuleNotFoundError: No module named 'gradio'
+Traceback (most recent call last):
+  File "/home/ubuntu/GT_VTR3_1/run/gradio_ootd.py", line 1, in <module>
+    import gradio as gr
+ModuleNotFoundError: No module named 'gradio'
+Traceback (most recent call last):
+  File "/home/ubuntu/GT_VTR3_1/run/gradio_ootd.py", line 1, in <module>
+    import gradio as gr
+ModuleNotFoundError: No module named 'gradio'
+Traceback (most recent call last):
+  File "/home/ubuntu/GT_VTR3_1/run/gradio_ootd.py", line 1, in <module>
+    import gradio as gr
+ModuleNotFoundError: No module named 'gradio'
+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+IMPORTANT: You are using gradio version 4.16.0, however version 4.29.0 is available, please upgrade.
+--------
+Running on local URL:  http://0.0.0.0:7860
+To create a public link, set `share=True` in `launch()`.
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 1082385134
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.03s/it]
 20%|██        | 2/10 [00:03<00:14,  1.78s/it]
 30%|███       | 3/10 [00:05<00:13,  1.95s/it]
 40%|████      | 4/10 [00:07<00:10,  1.81s/it]
 50%|█████     | 5/10 [00:09<00:08,  1.75s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.70s/it]
 70%|███████   | 7/10 [00:12<00:05,  1.68s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.66s/it]
 90%|█████████ | 9/10 [00:15<00:01,  1.64s/it]
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 1103655779
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  2.00s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.67s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.65s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+left shoulder - [148.0, 104.0]
+right shoulder - [232.0, 106.0]
+Shoulder width (in pixels): 84.02
+Estimated height (in pixels): 269.08
+Conversion factor (pixels to cm): 0.9628363312026164
+Shoulder width (in cm): 80.89750854764382
+Shoulder width (in INCH): 31.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 911324795
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.77s/it]
 30%|███       | 3/10 [00:05<00:11,  1.70s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.62s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.62s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+left shoulder - [148.0, 104.0]
+right shoulder - [232.0, 106.0]
+Shoulder width (in pixels): 84.02
+Estimated height (in pixels): 269.08
+Conversion factor (pixels to cm): 0.9628363312026164
+Shoulder width (in cm): 80.89750854764382
+Shoulder width (in INCH): 31.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 1627175075
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.02s/it]
 20%|██        | 2/10 [00:03<00:14,  1.79s/it]
 30%|███       | 3/10 [00:05<00:11,  1.71s/it]
 40%|████      | 4/10 [00:06<00:10,  1.68s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.65s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 1589160508
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.99s/it]
 20%|██        | 2/10 [00:03<00:14,  1.76s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.65s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.62s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.62s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 391988338
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.01s/it]
 20%|██        | 2/10 [00:03<00:14,  1.78s/it]
 30%|███       | 3/10 [00:05<00:11,  1.71s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.64s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 722354609
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.99s/it]
 20%|██        | 2/10 [00:03<00:14,  1.76s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.64s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.62s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.62s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 459777321
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  2.00s/it]
 20%|██        | 2/10 [00:03<00:14,  1.76s/it]
 30%|███       | 3/10 [00:05<00:11,  1.69s/it]
 40%|████      | 4/10 [00:06<00:09,  1.66s/it]
 50%|█████     | 5/10 [00:08<00:08,  1.65s/it]
 60%|██████    | 6/10 [00:10<00:06,  1.63s/it]
 70%|███████   | 7/10 [00:11<00:04,  1.63s/it]
 80%|████████  | 8/10 [00:13<00:03,  1.63s/it]
 90%|█████████ | 9/10 [00:14<00:01,  1.62s/it]
+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+Traceback (most recent call last):
+  File "/home/ubuntu/GT_VTR3_1/run/gradio_ootd.py", line 46, in <module>
+    ootd_model_dc.text_encoder.to('cuda')
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2460, in to
+    return super().to(*args, **kwargs)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1145, in to
+    return self._apply(convert)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 797, in _apply
+    module._apply(fn)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 797, in _apply
+    module._apply(fn)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 797, in _apply
+    module._apply(fn)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 820, in _apply
+    param_applied = fn(param)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1143, in convert
+    return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
+torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 146.00 MiB (GPU 0; 14.58 GiB total capacity; 4.96 GiB already allocated; 105.62 MiB free; 5.00 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
+Traceback (most recent call last):
+  File "/home/ubuntu/GT_VTR3_1/run/gradio_ootd.py", line 46, in <module>
+    ootd_model_dc.text_encoder.to('cuda')
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2460, in to
+    return super().to(*args, **kwargs)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1145, in to
+    return self._apply(convert)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 797, in _apply
+    module._apply(fn)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 797, in _apply
+    module._apply(fn)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 797, in _apply
+    module._apply(fn)
+  [Previous line repeated 3 more times]
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 820, in _apply
+    param_applied = fn(param)
+  File "/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1143, in convert
+    return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
+torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 14.58 GiB total capacity; 5.21 GiB already allocated; 5.62 MiB free; 5.28 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+/home/ubuntu/miniconda3/envs/vtr/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
+`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.
+IMPORTANT: You are using gradio version 4.16.0, however version 4.29.0 is available, please upgrade.
+--------
+Running on local URL:  http://0.0.0.0:7860
+To create a public link, set `share=True` in `launch()`.
+left shoulder - [151.0, 99.0]
+right shoulder - [233.0, 107.0]
+Shoulder width (in pixels): 82.39
+Estimated height (in pixels): 248.68
+Conversion factor (pixels to cm): 1.0418208138973781
+Shoulder width (in cm): 85.83561685700498
+Shoulder width (in INCH): 33.8
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
+Initial seed: 1770992986
+promt is None
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:02<00:18,  2.03s/it]
 20%|██        | 2/10 [00:03<00:14,  1.78s/it]
 30%|███       | 3/10 [00:06<00:16,  2.39s/it]
 40%|████      | 4/10 [00:08<00:12,  2.08s/it]
 50%|█████     | 5/10 [00:09<00:09,  1.91s/it]
 60%|██████    | 6/10 [00:11<00:07,  1.81s/it]
 70%|███████   | 7/10 [00:13<00:05,  1.74s/it]
 80%|████████  | 8/10 [00:14<00:03,  1.70s/it]
 90%|█████████ | 9/10 [00:16<00:01,  1.67s/it]

ootd/inference_ootd.py CHANGED Viewed

@@ -112,9 +112,12 @@ class OOTDiffusion:
             prompt_image = prompt_image.unsqueeze(1)
             if model_type == 'hd':
                 prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to('cuda'))[0]
                 prompt_embeds[:, 1:] = prompt_image[:]
             elif model_type == 'dc':
                 prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to('cuda'))[0]
                 prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1)
             else:
                 raise ValueError("model_type must be \'hd\' or \'dc\'!")

             prompt_image = prompt_image.unsqueeze(1)
             if model_type == 'hd':
                 prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to('cuda'))[0]
+                print(prompt_embeds.shape)
+                # print(prompt_embeds.shape)
                 prompt_embeds[:, 1:] = prompt_image[:]
             elif model_type == 'dc':
                 prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to('cuda'))[0]
+                print(prompt_embeds.shape)
                 prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1)
             else:
                 raise ValueError("model_type must be \'hd\' or \'dc\'!")

ootd/inference_ootd_dc.py CHANGED Viewed

@@ -38,6 +38,7 @@ class OOTDiffusionDC:
             VAE_PATH,
             subfolder="vae",
             torch_dtype=torch.float16,
         )
         unet_garm = UNetGarm2DConditionModel.from_pretrained(

             VAE_PATH,
             subfolder="vae",
             torch_dtype=torch.float16,
         )
         unet_garm = UNetGarm2DConditionModel.from_pretrained(

ootd/pipelines_ootd/__pycache__/attention_garm.cpython-310.pyc ADDED Viewed

Binary file (11.4 kB). View file

ootd/pipelines_ootd/__pycache__/attention_vton.cpython-310.pyc ADDED Viewed

Binary file (11.5 kB). View file

ootd/pipelines_ootd/__pycache__/pipeline_ootd.cpython-310.pyc ADDED Viewed

Binary file (27 kB). View file

ootd/pipelines_ootd/__pycache__/transformer_garm_2d.cpython-310.pyc ADDED Viewed

Binary file (13.6 kB). View file

ootd/pipelines_ootd/__pycache__/transformer_vton_2d.cpython-310.pyc ADDED Viewed

Binary file (13.7 kB). View file

ootd/pipelines_ootd/__pycache__/unet_garm_2d_blocks.cpython-310.pyc ADDED Viewed

Binary file (63.5 kB). View file

ootd/pipelines_ootd/__pycache__/unet_garm_2d_condition.cpython-310.pyc ADDED Viewed

Binary file (37 kB). View file

ootd/pipelines_ootd/__pycache__/unet_vton_2d_blocks.cpython-310.pyc ADDED Viewed

Binary file (63.6 kB). View file

ootd/pipelines_ootd/__pycache__/unet_vton_2d_condition.cpython-310.pyc ADDED Viewed

Binary file (37.2 kB). View file

ootd/pipelines_ootd/pipeline_ootd.py CHANGED Viewed

@@ -275,6 +275,10 @@ class OotdPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMix
         device = self._execution_device
         # check if scheduler is in sigmas space
         scheduler_is_in_sigma_space = hasattr(self.scheduler, "sigmas")
         # 2. Encode input prompt
         prompt_embeds = self._encode_prompt(

         device = self._execution_device
         # check if scheduler is in sigmas space
         scheduler_is_in_sigma_space = hasattr(self.scheduler, "sigmas")
+        print(f'promt is {prompt}') #debug
         # 2. Encode input prompt
         prompt_embeds = self._encode_prompt(

preprocess/humanparsing/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (160 Bytes). View file

preprocess/humanparsing/datasets/__pycache__/simple_extractor_dataset.cpython-310.pyc ADDED Viewed

Binary file (2.81 kB). View file

preprocess/humanparsing/run_parsing.py CHANGED Viewed

@@ -13,15 +13,26 @@ class Parsing:
     def __init__(self, gpu_id: int):
         # self.gpu_id = gpu_id
         # torch.cuda.set_device(gpu_id)
         session_options = ort.SessionOptions()
         session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
         session_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
         # session_options.add_session_config_entry('gpu_id', str(gpu_id))
         self.session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_atr.onnx'),
-                                            sess_options=session_options, providers=['CPUExecutionProvider'])
         self.lip_session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_lip.onnx'),
-                                                sess_options=session_options, providers=['CPUExecutionProvider'])
     def __call__(self, input_image):
         # torch.cuda.set_device(self.gpu_id)

     def __init__(self, gpu_id: int):
         # self.gpu_id = gpu_id
         # torch.cuda.set_device(gpu_id)
+        providers = [("CUDAExecutionProvider", {"device_id": torch.cuda.current_device(),
+                    "user_compute_stream": str(torch.cuda.current_stream().cuda_stream)})]
         session_options = ort.SessionOptions()
         session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
         session_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
         # session_options.add_session_config_entry('gpu_id', str(gpu_id))
+        # self.session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_atr.onnx'),
+        #                                     sess_options=session_options, providers=['CPUExecutionProvider'])
+        # self.lip_session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_lip.onnx'),
+        #                                         sess_options=session_options, providers=['CPUExecutionProvider'])
+        #new changes -
+        session_options.add_session_config_entry('gpu_id', str(gpu_id))
         self.session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_atr.onnx'),
+                                            sess_options=session_options, providers=providers)
         self.lip_session = ort.InferenceSession(os.path.join(Path(__file__).absolute().parents[2].absolute(), 'checkpoints/humanparsing/parsing_lip.onnx'),
+                                                sess_options=session_options, providers=providers)
     def __call__(self, input_image):
         # torch.cuda.set_device(self.gpu_id)

preprocess/humanparsing/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (157 Bytes). View file

preprocess/humanparsing/utils/__pycache__/transforms.cpython-310.pyc ADDED Viewed

Binary file (4.86 kB). View file

preprocess/openpose/annotator/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (3.19 kB). View file

preprocess/openpose/annotator/openpose/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (3.16 kB). View file

preprocess/openpose/annotator/openpose/__pycache__/body.cpython-310.pyc ADDED Viewed

Binary file (7.07 kB). View file

preprocess/openpose/annotator/openpose/__pycache__/face.cpython-310.pyc ADDED Viewed

Binary file (8.16 kB). View file

preprocess/openpose/annotator/openpose/__pycache__/hand.cpython-310.pyc ADDED Viewed

Binary file (3.19 kB). View file

preprocess/openpose/annotator/openpose/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (6.2 kB). View file

preprocess/openpose/annotator/openpose/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (7.75 kB). View file

preprocess/openpose/model_8.png ADDED Viewed

preprocess/openpose/run_openpose.py CHANGED Viewed

@@ -16,8 +16,11 @@ import time
 import json
 # from pytorch_lightning import seed_everything
-from preprocess.openpose.annotator.util import resize_image, HWC3
-from preprocess.openpose.annotator.openpose import OpenposeDetector
 import argparse
 from PIL import Image
@@ -27,7 +30,7 @@ import pdb
 # os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
 class OpenPose:
-    def __init__(self, gpu_id: int):
         # self.gpu_id = gpu_id
         # torch.cuda.set_device(gpu_id)
         self.preprocessor = OpenposeDetector()
@@ -47,6 +50,8 @@ class OpenPose:
             assert (H == 512 and W == 384), 'Incorrect input image shape'
             pose, detected_map = self.preprocessor(input_image, hand_and_face=False)
             candidate = pose['bodies']['candidate']
             subset = pose['bodies']['subset'][0][:18]
             for i in range(18):
@@ -75,10 +80,15 @@ class OpenPose:
             # output_image = cv2.resize(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB), (768, 1024))
             # cv2.imwrite('/home/aigc/ProjectVTON/OpenPose/keypoints/out_pose.jpg', output_image)
-        return keypoints
 if __name__ == '__main__':
     model = OpenPose()
-    model('./images/model.jpg')

 import json
 # from pytorch_lightning import seed_everything
+# from preprocess.openpose.annotator.util import resize_image, HWC3
+# from preprocess.openpose.annotator.openpose import OpenposeDetector
+from annotator.util import resize_image, HWC3
+from annotator.openpose import OpenposeDetector
 import argparse
 from PIL import Image
 # os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
 class OpenPose:
+    def __init__(self, gpu_id: int=0):
         # self.gpu_id = gpu_id
         # torch.cuda.set_device(gpu_id)
         self.preprocessor = OpenposeDetector()
             assert (H == 512 and W == 384), 'Incorrect input image shape'
             pose, detected_map = self.preprocessor(input_image, hand_and_face=False)
+            # print(pose)
             candidate = pose['bodies']['candidate']
             subset = pose['bodies']['subset'][0][:18]
             for i in range(18):
             # output_image = cv2.resize(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB), (768, 1024))
             # cv2.imwrite('/home/aigc/ProjectVTON/OpenPose/keypoints/out_pose.jpg', output_image)
+        # return keypoints
+        return keypoints, candidate, subset
 if __name__ == '__main__':
+    import os
     model = OpenPose()
+    print(os.getcwd())
+    pose = model('./model_8.png')

ps.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 43684

requirements.txt CHANGED Viewed

@@ -15,4 +15,5 @@ gradio==4.16.0
 config==0.5.1
 einops==0.7.0
 onnxruntime==1.16.2
-basicsr

 config==0.5.1
 einops==0.7.0
 onnxruntime==1.16.2
+basicsr
+onnxruntime-gpu==1.18.0

run/cloths_db.py ADDED Viewed

	@@ -0,0 +1,133 @@

+cloths_map= {
+ '050105_1.jpeg': "Upper-body",
+ '02783_00.jpeg': "Upper-body",
+ 'male_tshirt1.jpeg': "Upper-body",
+ '051515_1.jpeg': "Lower-body",
+ '07764_00.jpeg': "Upper-body",
+ '053786_1.jpeg': "Dress",
+ '09933_00.jpeg': "Upper-body",
+ '052234_1.jpeg': "Dress",
+ '049947_1.jpeg': "Upper-body",
+ '06123_00.jpeg': "Upper-body",
+ '051827_1.jpeg': "Lower-body",
+ '049805_1.jpeg': "Upper-body",
+ '049920_1.jpeg': "Upper-body",
+ '08348_00.jpeg': "Upper-body",
+ '050191_1.jpeg':"Upper-body",
+ '03244_00.jpeg': "Upper-body",
+ '11791_00.jpeg': "Upper-body",
+ '051946_1.jpeg': "Lower-body",
+ '00470_00.jpeg': "Upper-body",
+ '050181_1.jpeg': "Upper-body",
+ '03751_00.jpeg': "Upper-body",
+ '11028_00.jpeg': "Upper-body",
+ '07382_00.jpeg': "Upper-body",
+ '051998_1.jpeg': "Dress",
+ '051988_1.jpeg': "Lower-body",
+ '11351_00.jpeg': "Upper-body",
+ '07429_00.jpeg': "Upper-body",
+ '02305_00.jpeg': "Upper-body",
+ '03032_00.jpeg': "Upper-body",
+ '049949_1.jpeg': "Upper-body",
+ '049940_1.jpeg': "Upper-body",
+ '049965_1.jpeg': "Upper-body",
+ '053790_1.jpeg': "Dress",
+ '050002_1.jpeg': "Upper-body",
+ '10297_00.jpeg': "Upper-body",
+ '048769_1.jpeg': "Upper-body",
+ '00055_00.jpeg': "Upper-body",
+ '048554_1.jpeg': "Upper-body",
+ '051517_1.jpeg': "Lower-body",
+ '02015_00.jpeg': "Upper-body",
+ '053290_1.jpeg': "Dress",
+ '053742_1.jpeg': "Dress",
+ '04825_00.jpeg': "Upper-body",
+ '053319_1.jpeg': "Dress",
+ '053744_1.jpeg': "Dress",
+ '00151_00.jpeg': "Upper-body",
+ '06429_00.jpeg': "Upper-body",
+ '01260_00.jpeg': "Upper-body",
+ '12562_00.jpeg': "Upper-body",
+ '01430_00.jpeg': "Upper-body",
+ '051412_1.jpeg': "Lower-body",
+ '00126_00.jpeg': "Upper-body",
+ '051991_1.jpeg': "Lower-body",
+ '06802_00.jpeg': "Upper-body",
+ '051473_1.jpeg': "Lower-body"
+}
+modeL_db= {
+'051962_0.jpg': "no-dress",
+'052472_0.jpg': "dress",
+'02783_00.jpg': 0,
+'09933_00.jpg': 0,
+'053228_0.jpg': "dress",
+'051482_0.jpg': "no-dress",
+'model_1.png': 0,
+'051918_0.jpg': "no-dress",
+'05997_00.jpg': 0,
+'model_2.png': 0,
+'08348_00.jpg': 0,
+'07966_00.jpg': 0,
+'11791_00.jpg': 0,
+'model_9.png': "dress",
+'03751_00.jpg': 0,
+'11028_00.jpg': 0,
+'14627_00.jpg': 0,
+'11351_00.jpg': 0,
+'053700_0.jpg': "dress",
+'09597_00.jpg': 0,
+'07429_00.jpg': 0,
+'052964_0.jpg': "dress",
+'model_3.png': 0,
+'052767_0.jpg': "dress",
+'049447_0.jpg': "no-dress",
+'model_6.png': 0,
+'model_7.png': 0,
+'02849_00.jpg': 0,
+'model_5.png': 0,
+'model_4.png': 0,
+'06429_00.jpg': 0,
+'01260_00.jpg': 0,
+'01008_00.jpg': "no-dress",
+'049713_0.jpg': "no-dress",
+'model_8.png': "no-dress",
+'053514_0.jpg': "dress",
+'01861_00.jpg': 0,
+'01430_00.jpg': 0,
+'06802_00.jpg': 0,
+'049205_0.jpg': "no-dress"
+        }
+# import os
+# def list_files_in_folder(folder_path):
+#     """
+#     List all files in the given folder and create a dictionary with filenames as keys and 0 as values.
+#     Args:
+#     folder_path (str): Path to the folder.
+#     Returns:
+#     dict: Dictionary with filenames as keys and 0 as values.
+#     """
+#     files_dict = {}
+#     try:
+#         # List all files in the folder
+#         for file_name in os.listdir(folder_path):
+#             # Check if it's a file ("no-dress"t a directory)
+#             if os.path.isfile(os.path.join(folder_path, file_name)):
+#                 files_dict[file_name] = 0
+#     except Exception as e:
+#         print(f"An error occurred: {e}")
+#     return files_dict
+# # Example usage
+# folder_path = 'run/examples/model'
+# files_dict = list_files_in_folder(folder_path)
+# print(files_dict)

run/gradio_ootd copy_backup.py ADDED Viewed

	@@ -0,0 +1,380 @@

+import gradio as gr
+import os
+from pathlib import Path
+import sys
+import torch
+from PIL import Image, ImageOps
+import numpy as np
+from utils_ootd import get_mask_location
+PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
+sys.path.insert(0, str(PROJECT_ROOT))
+from preprocess.openpose.run_openpose import OpenPose
+from preprocess.humanparsing.run_parsing import Parsing
+from ootd.inference_ootd_hd import OOTDiffusionHD
+from ootd.inference_ootd_dc import OOTDiffusionDC
+from preprocess.openpose.annotator.openpose.util import draw_bodypose
+# Set default dtype to float64
+# torch.set_default_dtype(torch.float16)
+openpose_model_hd = OpenPose(0)
+parsing_model_hd = Parsing(0)
+ootd_model_hd = OOTDiffusionHD(0)
+openpose_model_dc = OpenPose(0)
+parsing_model_dc = Parsing(0)
+ootd_model_dc = OOTDiffusionDC(0)
+category_dict = ['upperbody', 'lowerbody', 'dress']
+category_dict_utils = ['upper_body', 'lower_body', 'dresses']
+example_path = os.path.join(os.path.dirname(__file__), 'examples')
+model_hd = os.path.join(example_path, 'model/model_1.png')
+garment_hd = os.path.join(example_path, 'garment/03244_00.jpg')
+model_dc = os.path.join(example_path, 'model/model_8.png')
+garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
+openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
+ootd_model_dc.pipe.to('cuda')
+ootd_model_dc.image_encoder.to('cuda')
+ootd_model_dc.text_encoder.to('cuda')
+def convert_to_image(image_array):
+    if isinstance(image_array, np.ndarray):
+        # Normalize the data to the range [0, 255]
+        image_array = 255 * (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
+        # Convert to uint8
+        image_array = image_array.astype(np.uint8)
+        return Image.fromarray(image_array)
+    else:
+        # Convert to NumPy array first if necessary
+        image_array = np.array(image_array)
+        # Normalize and convert to uint8
+        image_array = 255 * (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
+        image_array = image_array.astype(np.uint8)
+        return Image.fromarray(image_array)
+# import spaces
+# @spaces.GPU
+def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
+    model_type = 'hd'
+    category = 0 # 0:upperbody; 1:lowerbody; 2:dress
+    with torch.no_grad():
+        openpose_model_hd.preprocessor.body_estimation.model.to('cuda')
+        ootd_model_hd.pipe.to('cuda')
+        ootd_model_hd.image_encoder.to('cuda')
+        ootd_model_hd.text_encoder.to('cuda')
+        garm_img = Image.open(garm_img).resize((768, 1024))
+        vton_img = Image.open(vton_img).resize((768, 1024))
+        keypoints = openpose_model_hd(vton_img.resize((384, 512)))
+        model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))
+        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+        mask = mask.resize((768, 1024), Image.NEAREST)
+        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+        masked_vton_img = Image.composite(mask_gray, vton_img, mask)
+        images = ootd_model_hd(
+            model_type=model_type,
+            category=category_dict[category],
+            image_garm=garm_img,
+            image_vton=masked_vton_img,
+            mask=mask,
+            image_ori=vton_img,
+            num_samples=n_samples,
+            num_steps=n_steps,
+            image_scale=image_scale,
+            seed=seed,
+        )
+    return images
+# @spaces.GPU
+def process_dc(vton_img, garm_img, category):
+    model_type = 'dc'
+    if category == 'Upper-body':
+        category = 0
+    elif category == 'Lower-body':
+        category = 1
+    else:
+        category =2
+    with torch.no_grad():
+        # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
+        # ootd_model_dc.pipe.to('cuda')
+        # ootd_model_dc.image_encoder.to('cuda')
+        # ootd_model_dc.text_encoder.to('cuda')
+        garm_img = Image.open(garm_img).resize((768, 1024))
+        vton_img = Image.open(vton_img).resize((768, 1024))
+        keypoints ,candidate , subset = openpose_model_dc(vton_img.resize((384, 512)))
+        # print(len(keypoints["pose_keypoints_2d"]))
+        # print(keypoints["pose_keypoints_2d"])
+        # person_image = np.asarray(vton_img)
+        # print(len(person_image))
+        # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
+        # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
+        # output_image = Image.fromarray(output)
+        # output_image.save('keypose.png')
+        left_point = keypoints["pose_keypoints_2d"][2]
+        right_point = keypoints["pose_keypoints_2d"][5]
+        neck_point = keypoints["pose_keypoints_2d"][1]
+        hip_point = keypoints["pose_keypoints_2d"][8]
+        print(f'left shoulder - {left_point}')
+        print(f'right shoulder - {right_point}')
+        # #find disctance using Euclidian distance
+        shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
+        height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
+        # # Assuming an average human height
+        average_height_cm = 172.72 *1.5
+        # Conversion factor from pixels to cm
+        conversion_factor = average_height_cm / height_pixels
+        # Convert shoulder width to real-world units
+        shoulder_width_cm = shoulder_width_pixels * conversion_factor
+        print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
+        print(f'Estimated height (in pixels): {height_pixels}')
+        print(f'Conversion factor (pixels to cm): {conversion_factor}')
+        print(f'Shoulder width (in cm): {shoulder_width_cm}')
+        print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
+        model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
+        model_parse_image = convert_to_image(model_parse)
+        face_mask_image = convert_to_image(face_mask)
+        # Save the images
+        model_parse_image.save('model_parse_image.png')
+        face_mask_image.save('face_mask_image.png')
+        mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+        mask = mask.resize((768, 1024), Image.NEAREST)
+        mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+        masked_vton_img = Image.composite(mask_gray, vton_img, mask)
+        images = ootd_model_dc(
+            model_type=model_type,
+            category=category_dict[category],
+            image_garm=garm_img,
+            image_vton=masked_vton_img,
+            mask=mask,
+            image_ori=vton_img,
+            num_samples=1,
+            num_steps=10,
+            image_scale=  2.0,
+            seed=-1,
+        )
+    return images
+block = gr.Blocks().queue()
+with block:
+    with gr.Row():
+        gr.Markdown("# ")
+    # with gr.Row():
+    #     gr.Markdown("## Half-body-1")
+    # with gr.Row():
+    #     gr.Markdown("***Support upper-body garments***")
+    # with gr.Row():
+        # with gr.Column():
+        #     vton_img = gr.Image(label="Model", sources='upload', type="filepath", height=384, value=model_hd)
+        #     example = gr.Examples(
+        #         inputs=vton_img,
+        #         examples_per_page=14,
+        #         examples=[
+        #             os.path.join(example_path, 'model/model_1.png'),
+        #             os.path.join(example_path, 'model/model_2.png'),
+        #             os.path.join(example_path, 'model/model_3.png'),
+        #             os.path.join(example_path, 'model/model_4.png'),
+        #             os.path.join(example_path, 'model/model_5.png'),
+        #             os.path.join(example_path, 'model/model_6.png'),
+        #             os.path.join(example_path, 'model/model_7.png'),
+        #             os.path.join(example_path, 'model/01008_00.jpg'),
+        #             os.path.join(example_path, 'model/07966_00.jpg'),
+        #             os.path.join(example_path, 'model/05997_00.jpg'),
+        #             os.path.join(example_path, 'model/02849_00.jpg'),
+        #             os.path.join(example_path, 'model/14627_00.jpg'),
+        #             os.path.join(example_path, 'model/09597_00.jpg'),
+        #             os.path.join(example_path, 'model/01861_00.jpg'),
+        #         ])
+    #     with gr.Column():
+    #         garm_img = gr.Image(label="Garment", sources='upload', type="filepath", height=384, value=garment_hd)
+    #         example = gr.Examples(
+    #             inputs=garm_img,
+    #             examples_per_page=14,
+    #             examples=[
+    #                 os.path.join(example_path, 'garment/03244_00.jpg'),
+    #                 os.path.join(example_path, 'garment/00126_00.jpg'),
+    #                 os.path.join(example_path, 'garment/03032_00.jpg'),
+    #                 os.path.join(example_path, 'garment/06123_00.jpg'),
+    #                 os.path.join(example_path, 'garment/02305_00.jpg'),
+    #                 os.path.join(example_path, 'garment/00055_00.jpg'),
+    #                 os.path.join(example_path, 'garment/00470_00.jpg'),
+    #                 os.path.join(example_path, 'garment/02015_00.jpg'),
+    #                 os.path.join(example_path, 'garment/10297_00.jpg'),
+    #                 os.path.join(example_path, 'garment/07382_00.jpg'),
+    #                 os.path.join(example_path, 'garment/07764_00.jpg'),
+    #                 os.path.join(example_path, 'garment/00151_00.jpg'),
+    #                 os.path.join(example_path, 'garment/12562_00.jpg'),
+    #                 os.path.join(example_path, 'garment/04825_00.jpg'),
+    #             ])
+    #     with gr.Column():
+    #         result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
+    # with gr.Column():
+    #     run_button = gr.Button(value="Run")
+    #     n_samples = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
+    #     n_steps = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
+    #     # scale = gr.Slider(label="Scale", minimum=1.0, maximum=12.0, value=5.0, step=0.1)
+    #     image_scale = gr.Slider(label="Guidance scale", minimum=1.0, maximum=5.0, value=2.0, step=0.1)
+    #     seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
+    # ips = [vton_img, garm_img, n_samples, n_steps, image_scale, seed]
+    # run_button.click(fn=process_hd, inputs=ips, outputs=[result_gallery])
+    with gr.Row():
+        gr.Markdown("## Virtual Trial Room")
+    with gr.Row():
+        gr.Markdown("*** Note :- Please Select Garment Type in below drop-down as upper-body/lower-body/dresses;***")
+    with gr.Row():
+        with gr.Column():
+            vton_img_dc = gr.Image(label="Model", sources='upload', type="filepath", height=384, value=model_dc)
+            example = gr.Examples(
+                label="Select for Upper/Lower Body",
+                inputs=vton_img_dc,
+                examples_per_page=7,
+                examples=[
+                    os.path.join(example_path, 'model/model_8.png'),
+                    os.path.join(example_path, 'model/049447_0.jpg'),
+                    os.path.join(example_path, 'model/049713_0.jpg'),
+                    os.path.join(example_path, 'model/051482_0.jpg'),
+                    os.path.join(example_path, 'model/051918_0.jpg'),
+                    os.path.join(example_path, 'model/051962_0.jpg'),
+                    os.path.join(example_path, 'model/049205_0.jpg'),
+                ]
+                )
+            example = gr.Examples(
+                label="Select for Full Body Dress",
+                inputs=vton_img_dc,
+                examples_per_page=7,
+                examples=[
+                    os.path.join(example_path, 'model/model_9.png'),
+                #     os.path.join(example_path, 'model/052767_0.jpg'),
+                #     os.path.join(example_path, 'model/052472_0.jpg'),
+                    os.path.join(example_path, 'model/053514_0.jpg'),
+                    os.path.join(example_path, 'model/male/male_side.png'),
+                    os.path.join(example_path, 'model/male/male_2.png'),
+                    os.path.join(example_path, 'model/male/femal_s_34.png'),
+                    os.path.join(example_path, 'model/male/femal_s_34_test.png'),
+                    os.path.join(example_path, 'model/male/male_small.png'),
+                    os.path.join(example_path, 'model/male/female.png'),
+                #     os.path.join(example_path, 'model/053228_0.jpg'),
+                #     os.path.join(example_path, 'model/052964_0.jpg'),
+                #     os.path.join(example_path, 'model/053700_0.jpg'),
+                ]
+                )
+        with gr.Column():
+            garm_img_dc = gr.Image(label="Garment", sources='upload', type="filepath", height=384, value=garment_dc)
+            category_dc = gr.Dropdown(label="Garment category (important option!!!)", choices=["Upper-body", "Lower-body", "Dress"], value="Upper-body")
+            example = gr.Examples(
+                label="Examples (upper-body)",
+                inputs=garm_img_dc,
+                examples_per_page=7,
+                examples=[
+                    os.path.join(example_path,'garment/01260_00.jpg'),
+                    os.path.join(example_path,'garment/01430_00.jpg'),
+                    os.path.join(example_path,'garment/02783_00.jpg'),
+                    os.path.join(example_path,'garment/03751_00.jpg'),
+                    os.path.join(example_path,'garment/06429_00.jpg'),
+                    os.path.join(example_path,'garment/06802_00.jpg'),
+                    os.path.join(example_path,'garment/07429_00.jpg'),
+                    os.path.join(example_path,'garment/08348_00.jpg'),
+                    os.path.join(example_path,'garment/09933_00.jpg'),
+                    os.path.join(example_path,'garment/11028_00.jpg'),
+                    os.path.join(example_path,'garment/11351_00.jpg'),
+                    os.path.join(example_path,'garment/11791_00.jpg'),
+                    os.path.join(example_path, 'garment/048554_1.jpg'),
+                    os.path.join(example_path, 'garment/049920_1.jpg'),
+                    os.path.join(example_path, 'garment/049965_1.jpg'),
+                    os.path.join(example_path, 'garment/049949_1.jpg'),
+                    os.path.join(example_path, 'garment/050181_1.jpg'),
+                    os.path.join(example_path, 'garment/049805_1.jpg'),
+                    os.path.join(example_path, 'garment/050105_1.jpg'),
+                    os.path.join(example_path, 'garment/male_tshirt1.png'),
+                ])
+            example = gr.Examples(
+                label="Examples (lower-body)",
+                inputs=garm_img_dc,
+                examples_per_page=7,
+                examples=[
+                    os.path.join(example_path, 'garment/051827_1.jpg'),
+                    os.path.join(example_path, 'garment/051946_1.jpg'),
+                    os.path.join(example_path, 'garment/051473_1.jpg'),
+                    os.path.join(example_path, 'garment/051515_1.jpg'),
+                    os.path.join(example_path, 'garment/051517_1.jpg'),
+                    os.path.join(example_path, 'garment/051988_1.jpg'),
+                    os.path.join(example_path, 'garment/051412_1.jpg'),
+                ])
+            example = gr.Examples(
+                label="Examples (dress)",
+                inputs=garm_img_dc,
+                examples_per_page=7,
+                examples=[
+                    os.path.join(example_path, 'garment/053290_1.jpg'),
+                    os.path.join(example_path, 'garment/053744_1.jpg'),
+                    os.path.join(example_path, 'garment/053742_1.jpg'),
+                    os.path.join(example_path, 'garment/053786_1.jpg'),
+                    os.path.join(example_path, 'garment/053790_1.jpg'),
+                    os.path.join(example_path, 'garment/053319_1.jpg'),
+                    os.path.join(example_path, 'garment/052234_1.jpg'),
+                ])
+        with gr.Column():
+            result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
+    with gr.Column():
+        run_button_dc = gr.Button(value="Run")
+        # n_samples_dc = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
+        # n_steps_dc = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
+        # scale_dc = gr.Slider(label="Scale", minimum=1.0, maximum=12.0, value=5.0, step=0.1)
+        # image_scale_dc = gr.Slider(label="Guidance scale", minimum=1.0, maximum=5.0, value=2.0, step=0.1)
+        # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
+    ips_dc = [vton_img_dc, garm_img_dc, category_dc]
+    run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
+block.launch(server_name="0.0.0.0", server_port=7860 )

run/gradio_ootd.py CHANGED Viewed

@@ -4,9 +4,9 @@ from pathlib import Path
 import sys
 import torch
 from PIL import Image, ImageOps
 from utils_ootd import get_mask_location
 PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
 sys.path.insert(0, str(PROJECT_ROOT))
@@ -14,15 +14,19 @@ from preprocess.openpose.run_openpose import OpenPose
 from preprocess.humanparsing.run_parsing import Parsing
 from ootd.inference_ootd_hd import OOTDiffusionHD
 from ootd.inference_ootd_dc import OOTDiffusionDC
 openpose_model_hd = OpenPose(0)
 parsing_model_hd = Parsing(0)
 ootd_model_hd = OOTDiffusionHD(0)
-openpose_model_dc = OpenPose(1)
-parsing_model_dc = Parsing(1)
-ootd_model_dc = OOTDiffusionDC(1)
 category_dict = ['upperbody', 'lowerbody', 'dress']
@@ -30,15 +34,36 @@ category_dict_utils = ['upper_body', 'lower_body', 'dresses']
 example_path = os.path.join(os.path.dirname(__file__), 'examples')
 model_hd = os.path.join(example_path, 'model/model_1.png')
 garment_hd = os.path.join(example_path, 'garment/03244_00.jpg')
 model_dc = os.path.join(example_path, 'model/model_8.png')
 garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
-import spaces
-@spaces.GPU
 def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
     model_type = 'hd'
     category = 0 # 0:upperbody; 1:lowerbody; 2:dress
@@ -75,7 +100,9 @@ def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
     return images
-@spaces.GPU
 def process_dc(vton_img, garm_img, category):
     model_type = 'dc'
     if category == 'Upper-body':
@@ -86,21 +113,90 @@ def process_dc(vton_img, garm_img, category):
         category =2
     with torch.no_grad():
-        openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
-        ootd_model_dc.pipe.to('cuda')
-        ootd_model_dc.image_encoder.to('cuda')
-        ootd_model_dc.text_encoder.to('cuda')
         garm_img = Image.open(garm_img).resize((768, 1024))
         vton_img = Image.open(vton_img).resize((768, 1024))
-        keypoints = openpose_model_dc(vton_img.resize((384, 512)))
-        model_parse, _ = parsing_model_dc(vton_img.resize((384, 512)))
         mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
         mask = mask.resize((768, 1024), Image.NEAREST)
         mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
         masked_vton_img = Image.composite(mask_gray, vton_img, mask)
         images = ootd_model_dc(
             model_type=model_type,
@@ -110,167 +206,147 @@ def process_dc(vton_img, garm_img, category):
             mask=mask,
             image_ori=vton_img,
             num_samples=1,
-            num_steps=20,
-            image_scale=2.0,
             seed=-1,
         )
     return images
 block = gr.Blocks().queue()
 with block:
     with gr.Row():
         gr.Markdown("# ")
-    # with gr.Row():
-    #     gr.Markdown("## Half-body-1")
-    # with gr.Row():
-    #     gr.Markdown("***Support upper-body garments***")
-    # with gr.Row():
-        # with gr.Column():
-        #     vton_img = gr.Image(label="Model", sources='upload', type="filepath", height=384, value=model_hd)
-        #     example = gr.Examples(
-        #         inputs=vton_img,
-        #         examples_per_page=14,
-        #         examples=[
-        #             os.path.join(example_path, 'model/model_1.png'),
-        #             os.path.join(example_path, 'model/model_2.png'),
-        #             os.path.join(example_path, 'model/model_3.png'),
-        #             os.path.join(example_path, 'model/model_4.png'),
-        #             os.path.join(example_path, 'model/model_5.png'),
-        #             os.path.join(example_path, 'model/model_6.png'),
-        #             os.path.join(example_path, 'model/model_7.png'),
-        #             os.path.join(example_path, 'model/01008_00.jpg'),
-        #             os.path.join(example_path, 'model/07966_00.jpg'),
-        #             os.path.join(example_path, 'model/05997_00.jpg'),
-        #             os.path.join(example_path, 'model/02849_00.jpg'),
-        #             os.path.join(example_path, 'model/14627_00.jpg'),
-        #             os.path.join(example_path, 'model/09597_00.jpg'),
-        #             os.path.join(example_path, 'model/01861_00.jpg'),
-        #         ])
-    #     with gr.Column():
-    #         garm_img = gr.Image(label="Garment", sources='upload', type="filepath", height=384, value=garment_hd)
-    #         example = gr.Examples(
-    #             inputs=garm_img,
-    #             examples_per_page=14,
-    #             examples=[
-    #                 os.path.join(example_path, 'garment/03244_00.jpg'),
-    #                 os.path.join(example_path, 'garment/00126_00.jpg'),
-    #                 os.path.join(example_path, 'garment/03032_00.jpg'),
-    #                 os.path.join(example_path, 'garment/06123_00.jpg'),
-    #                 os.path.join(example_path, 'garment/02305_00.jpg'),
-    #                 os.path.join(example_path, 'garment/00055_00.jpg'),
-    #                 os.path.join(example_path, 'garment/00470_00.jpg'),
-    #                 os.path.join(example_path, 'garment/02015_00.jpg'),
-    #                 os.path.join(example_path, 'garment/10297_00.jpg'),
-    #                 os.path.join(example_path, 'garment/07382_00.jpg'),
-    #                 os.path.join(example_path, 'garment/07764_00.jpg'),
-    #                 os.path.join(example_path, 'garment/00151_00.jpg'),
-    #                 os.path.join(example_path, 'garment/12562_00.jpg'),
-    #                 os.path.join(example_path, 'garment/04825_00.jpg'),
-    #             ])
-    #     with gr.Column():
-    #         result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
-    # with gr.Column():
-    #     run_button = gr.Button(value="Run")
-    #     n_samples = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
-    #     n_steps = gr.Slider(label="Steps", minimum=20, maximum=40, value=20, step=1)
-    #     # scale = gr.Slider(label="Scale", minimum=1.0, maximum=12.0, value=5.0, step=0.1)
-    #     image_scale = gr.Slider(label="Guidance scale", minimum=1.0, maximum=5.0, value=2.0, step=0.1)
-    #     seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
-    # ips = [vton_img, garm_img, n_samples, n_steps, image_scale, seed]
-    # run_button.click(fn=process_hd, inputs=ips, outputs=[result_gallery])
     with gr.Row():
         gr.Markdown("## Virtual Trial Room")
-    with gr.Row():
-        gr.Markdown("*** Note :- Please Select Garment Type in below drop-down as upper-body/lower-body/dresses;***")
     with gr.Row():
         with gr.Column():
             vton_img_dc = gr.Image(label="Model", sources='upload', type="filepath", height=384, value=model_dc)
-            example = gr.Examples(
-                label="Select for Upper/Lower Body",
-                inputs=vton_img_dc,
-                examples_per_page=7,
-                examples=[
-                    os.path.join(example_path, 'model/model_8.png'),
-                #     os.path.join(example_path, 'model/049447_0.jpg'),
-                    os.path.join(example_path, 'model/049713_0.jpg'),
-                #     os.path.join(example_path, 'model/051482_0.jpg'),
-                #     os.path.join(example_path, 'model/051918_0.jpg'),
-                #     os.path.join(example_path, 'model/051962_0.jpg'),
-                #     os.path.join(example_path, 'model/049205_0.jpg'),
-                ]
-                )
-            example = gr.Examples(
-                label="Select for Full Body Dress",
-                inputs=vton_img_dc,
-                examples_per_page=7,
-                examples=[
-                    os.path.join(example_path, 'model/model_9.png'),
-                #     os.path.join(example_path, 'model/052767_0.jpg'),
-                #     os.path.join(example_path, 'model/052472_0.jpg'),
-                    os.path.join(example_path, 'model/053514_0.jpg'),
-                #     os.path.join(example_path, 'model/053228_0.jpg'),
-                #     os.path.join(example_path, 'model/052964_0.jpg'),
-                #     os.path.join(example_path, 'model/053700_0.jpg'),
-                ]
-                )
         with gr.Column():
             garm_img_dc = gr.Image(label="Garment", sources='upload', type="filepath", height=384, value=garment_dc)
             category_dc = gr.Dropdown(label="Garment category (important option!!!)", choices=["Upper-body", "Lower-body", "Dress"], value="Upper-body")
             example = gr.Examples(
                 label="Examples (upper-body)",
                 inputs=garm_img_dc,
                 examples_per_page=7,
                 examples=[
-                    os.path.join(example_path,'garment/01260_00.jpg'),
-                    os.path.join(example_path,'garment/01430_00.jpg'),
-                    os.path.join(example_path,'garment/02783_00.jpg'),
-                    os.path.join(example_path,'garment/03751_00.jpg'),
-                    os.path.join(example_path,'garment/06429_00.jpg'),
-                    os.path.join(example_path,'garment/06802_00.jpg'),
-                    os.path.join(example_path,'garment/07429_00.jpg'),
-                    os.path.join(example_path,'garment/08348_00.jpg'),
-                    os.path.join(example_path,'garment/09933_00.jpg'),
-                    os.path.join(example_path,'garment/11028_00.jpg'),
-                    os.path.join(example_path,'garment/11351_00.jpg'),
-                    os.path.join(example_path,'garment/11791_00.jpg'),
-                    os.path.join(example_path, 'garment/048554_1.jpg'),
-                    os.path.join(example_path, 'garment/049920_1.jpg'),
-                    os.path.join(example_path, 'garment/049965_1.jpg'),
-                    os.path.join(example_path, 'garment/049949_1.jpg'),
-                    os.path.join(example_path, 'garment/050181_1.jpg'),
-                    os.path.join(example_path, 'garment/049805_1.jpg'),
-                    os.path.join(example_path, 'garment/050105_1.jpg'),
                 ])
             example = gr.Examples(
                 label="Examples (lower-body)",
                 inputs=garm_img_dc,
                 examples_per_page=7,
                 examples=[
-                    os.path.join(example_path, 'garment/051827_1.jpg'),
-                    os.path.join(example_path, 'garment/051946_1.jpg'),
-                    os.path.join(example_path, 'garment/051473_1.jpg'),
-                    os.path.join(example_path, 'garment/051515_1.jpg'),
-                    os.path.join(example_path, 'garment/051517_1.jpg'),
-                    os.path.join(example_path, 'garment/051988_1.jpg'),
-                    os.path.join(example_path, 'garment/051412_1.jpg'),
                 ])
             example = gr.Examples(
                 label="Examples (dress)",
                 inputs=garm_img_dc,
                 examples_per_page=7,
                 examples=[
-                    os.path.join(example_path, 'garment/053290_1.jpg'),
-                    os.path.join(example_path, 'garment/053744_1.jpg'),
-                    os.path.join(example_path, 'garment/053742_1.jpg'),
-                    os.path.join(example_path, 'garment/053786_1.jpg'),
-                    os.path.join(example_path, 'garment/053790_1.jpg'),
-                    os.path.join(example_path, 'garment/053319_1.jpg'),
-                    os.path.join(example_path, 'garment/052234_1.jpg'),
                 ])
         with gr.Column():
             result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
@@ -281,9 +357,12 @@ with block:
         # scale_dc = gr.Slider(label="Scale", minimum=1.0, maximum=12.0, value=5.0, step=0.1)
         # image_scale_dc = gr.Slider(label="Guidance scale", minimum=1.0, maximum=5.0, value=2.0, step=0.1)
         # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
-    ips_dc = [vton_img_dc, garm_img_dc, category_dc]
     run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
-block.launch()

 import sys
 import torch
 from PIL import Image, ImageOps
+import numpy as np
 from utils_ootd import get_mask_location
+from cloths_db import cloths_map, modeL_db
 PROJECT_ROOT = Path(__file__).absolute().parents[1].absolute()
 sys.path.insert(0, str(PROJECT_ROOT))
 from preprocess.humanparsing.run_parsing import Parsing
 from ootd.inference_ootd_hd import OOTDiffusionHD
 from ootd.inference_ootd_dc import OOTDiffusionDC
+from preprocess.openpose.annotator.openpose.util import draw_bodypose
+# Set default dtype to float64
+# torch.set_default_dtype(torch.float16)
 openpose_model_hd = OpenPose(0)
 parsing_model_hd = Parsing(0)
 ootd_model_hd = OOTDiffusionHD(0)
+openpose_model_dc = OpenPose(0)
+parsing_model_dc = Parsing(0)
+ootd_model_dc = OOTDiffusionDC(0)
 category_dict = ['upperbody', 'lowerbody', 'dress']
 example_path = os.path.join(os.path.dirname(__file__), 'examples')
+garment_path = os.path.join(os.path.dirname(__file__), 'examples','garment')
 model_hd = os.path.join(example_path, 'model/model_1.png')
 garment_hd = os.path.join(example_path, 'garment/03244_00.jpg')
 model_dc = os.path.join(example_path, 'model/model_8.png')
 garment_dc = os.path.join(example_path, 'garment/048554_1.jpg')
+openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
+ootd_model_dc.pipe.to('cuda')
+ootd_model_dc.image_encoder.to('cuda')
+ootd_model_dc.text_encoder.to('cuda')
+def convert_to_image(image_array):
+    if isinstance(image_array, np.ndarray):
+        # Normalize the data to the range [0, 255]
+        image_array = 255 * (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
+        # Convert to uint8
+        image_array = image_array.astype(np.uint8)
+        return Image.fromarray(image_array)
+    else:
+        # Convert to NumPy array first if necessary
+        image_array = np.array(image_array)
+        # Normalize and convert to uint8
+        image_array = 255 * (image_array - np.min(image_array)) / (np.max(image_array) - np.min(image_array))
+        image_array = image_array.astype(np.uint8)
+        return Image.fromarray(image_array)
+# import spaces
+# @spaces.GPU
 def process_hd(vton_img, garm_img, n_samples, n_steps, image_scale, seed):
     model_type = 'hd'
     category = 0 # 0:upperbody; 1:lowerbody; 2:dress
     return images
+# @spaces.GPU
 def process_dc(vton_img, garm_img, category):
     model_type = 'dc'
     if category == 'Upper-body':
         category =2
     with torch.no_grad():
+        # openpose_model_dc.preprocessor.body_estimation.model.to('cuda')
+        # ootd_model_dc.pipe.to('cuda')
+        # ootd_model_dc.image_encoder.to('cuda')
+        # ootd_model_dc.text_encoder.to('cuda')
         garm_img = Image.open(garm_img).resize((768, 1024))
         vton_img = Image.open(vton_img).resize((768, 1024))
+        keypoints ,candidate , subset = openpose_model_dc(vton_img.resize((384, 512)))
+        # print(len(keypoints["pose_keypoints_2d"]))
+        # print(keypoints["pose_keypoints_2d"])
+        # person_image = np.asarray(vton_img)
+        # print(len(person_image))
+        # person_image = np.asarray(Image.open(vton_img).resize((768, 1024)))
+        # output = draw_bodypose(canvas=person_image,candidate=candidate, subset=subset )
+        # output_image = Image.fromarray(output)
+        # output_image.save('keypose.png')
+        left_point = keypoints["pose_keypoints_2d"][2]
+        right_point = keypoints["pose_keypoints_2d"][5]
+        neck_point = keypoints["pose_keypoints_2d"][1]
+        hip_point = keypoints["pose_keypoints_2d"][8]
+        print(f'left shoulder - {left_point}')
+        print(f'right shoulder - {right_point}')
+        # #find disctance using Euclidian distance
+        shoulder_width_pixels = round(np.sqrt( np.power((right_point[0]-left_point[0]),2) + np.power((right_point[1]-left_point[1]),2)),2)
+        height_pixels  = round(np.sqrt( np.power((neck_point[0]-hip_point[0]),2) + np.power((neck_point[1]-hip_point[1]),2)),2) *2
+        # # Assuming an average human height
+        average_height_cm = 172.72 *1.5
+        # Conversion factor from pixels to cm
+        conversion_factor = average_height_cm / height_pixels
+        # Convert shoulder width to real-world units
+        shoulder_width_cm = shoulder_width_pixels * conversion_factor
+        print(f'Shoulder width (in pixels): {shoulder_width_pixels}')
+        print(f'Estimated height (in pixels): {height_pixels}')
+        print(f'Conversion factor (pixels to cm): {conversion_factor}')
+        print(f'Shoulder width (in cm): {shoulder_width_cm}')
+        print(f'Shoulder width (in INCH): {round(shoulder_width_cm/2.54,1)}')
+        model_parse, face_mask = parsing_model_dc(vton_img.resize((384, 512)))
+        model_parse_image = convert_to_image(model_parse)
+        face_mask_image = convert_to_image(face_mask)
+        # Save the images
+        model_parse_image.save('model_parse_image.png')
+        face_mask_image.save('face_mask_image.png')
         mask, mask_gray = get_mask_location(model_type, category_dict_utils[category], model_parse, keypoints)
+        # final_mask = convert_to_image(mask)
+        # final_mask.save("final_mask.png")
+        # final_mask_grat = convert_to_image(mask_gray)
+        # final_mask_grat.save("final_mask_grat.png")
         mask = mask.resize((768, 1024), Image.NEAREST)
         mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)
+        # Save the resized masks
+        mask.save("mask_resized.png")
+        mask_gray.save("mask_gray_resized.png")
         masked_vton_img = Image.composite(mask_gray, vton_img, mask)
+        masked_vton_img.save("masked_vton_img.png")
         images = ootd_model_dc(
             model_type=model_type,
             mask=mask,
             image_ori=vton_img,
             num_samples=1,
+            num_steps=10,
+            image_scale=  2.0,
             seed=-1,
         )
     return images
+# is_upper = False
 block = gr.Blocks().queue()
 with block:
     with gr.Row():
         gr.Markdown("# ")
     with gr.Row():
         gr.Markdown("## Virtual Trial Room")
+    # with gr.Row():
+    #     gr.Markdown("")
     with gr.Row():
         with gr.Column():
             vton_img_dc = gr.Image(label="Model", sources='upload', type="filepath", height=384, value=model_dc)
+             # Hidden component to store is_upper value
+            # is_upper = gr.State(value=True)
+            # #set is_upper variable to True when user selects examples from gr.examples upper/lower body
+            # def check_image_type(image_path):
+            #     if image_path:
+            #         filename = os.path.basename(image_path)
+            #         image_type = modeL_db.get(filename, "no-dress")  # Default to "no-dress" if not found
+            #         return image_type == "no-dress"
+            #     return False  # Default to True if no image
+            # vton_img_dc.change(fn=check_image_type, inputs=vton_img_dc)
+        example = gr.Examples(
+            label="Select for Upper/Lower Body",
+            inputs=vton_img_dc,
+            examples_per_page=7,
+            examples=[
+                os.path.join(example_path, 'model/model_8.png'),
+                os.path.join(example_path, 'model/049447_0.jpg'),
+                os.path.join(example_path, 'model/049713_0.jpg'),
+                os.path.join(example_path, 'model/051482_0.jpg'),
+                os.path.join(example_path, 'model/051918_0.jpg'),
+                os.path.join(example_path, 'model/051962_0.jpg'),
+                os.path.join(example_path, 'model/049205_0.jpg'),
+            ],
+            )
+        example = gr.Examples(
+            label="Select for Full Body Dress",
+            inputs=vton_img_dc,
+            examples_per_page=7,
+            examples=[
+                os.path.join(example_path, 'model/model_9.png'),
+                os.path.join(example_path, 'model/052767_0.jpg'),
+                os.path.join(example_path, 'model/052472_0.jpg'),
+                os.path.join(example_path, 'model/053514_0.jpg'),
+                os.path.join(example_path, 'model/053228_0.jpg'),
+                os.path.join(example_path, 'model/052964_0.jpg'),
+                os.path.join(example_path, 'model/053700_0.jpg'),
+            ],
+            )
         with gr.Column():
             garm_img_dc = gr.Image(label="Garment", sources='upload', type="filepath", height=384, value=garment_dc)
             category_dc = gr.Dropdown(label="Garment category (important option!!!)", choices=["Upper-body", "Lower-body", "Dress"], value="Upper-body")
+            def update_category(image_path):
+                if image_path:
+                    filename = os.path.basename(image_path)
+                    return cloths_map.get(filename, None)  # Return None if not found
+                return None  # Return None if no image
+            garm_img_dc.change(fn=update_category, inputs=garm_img_dc, outputs=category_dc)
             example = gr.Examples(
                 label="Examples (upper-body)",
                 inputs=garm_img_dc,
                 examples_per_page=7,
                 examples=[
+                    os.path.join(garment_path,'01260_00.jpg'),
+                    os.path.join(garment_path,'01430_00.jpg'),
+                    os.path.join(garment_path,'02783_00.jpg'),
+                    os.path.join(garment_path,'03751_00.jpg'),
+                    os.path.join(garment_path,'06429_00.jpg'),
+                    os.path.join(garment_path,'06802_00.jpg'),
+                    os.path.join(garment_path,'07429_00.jpg'),
+                    os.path.join(garment_path,'08348_00.jpg'),
+                    os.path.join(garment_path,'09933_00.jpg'),
+                    os.path.join(garment_path,'11028_00.jpg'),
+                    os.path.join(garment_path,'11351_00.jpg'),
+                    os.path.join(garment_path,'11791_00.jpg'),
+                    os.path.join(garment_path, '048554_1.jpg'),
+                    os.path.join(garment_path, '049920_1.jpg'),
+                    os.path.join(garment_path, '049965_1.jpg'),
+                    os.path.join(garment_path, '049949_1.jpg'),
+                    os.path.join(garment_path, '050181_1.jpg'),
+                    os.path.join(garment_path, '049805_1.jpg'),
+                    os.path.join(garment_path, '050105_1.jpg'),
+                    os.path.join(garment_path, 'male_tshirt1.png'),
                 ])
             example = gr.Examples(
                 label="Examples (lower-body)",
                 inputs=garm_img_dc,
                 examples_per_page=7,
                 examples=[
+                    os.path.join(garment_path, '051827_1.jpg'),
+                    os.path.join(garment_path, '051946_1.jpg'),
+                    os.path.join(garment_path, '051473_1.jpg'),
+                    os.path.join(garment_path, '051515_1.jpg'),
+                    os.path.join(garment_path, '051517_1.jpg'),
+                    os.path.join(garment_path, '051988_1.jpg'),
+                    os.path.join(garment_path, '051412_1.jpg'),
                 ])
             example = gr.Examples(
                 label="Examples (dress)",
                 inputs=garm_img_dc,
                 examples_per_page=7,
                 examples=[
+                    os.path.join(garment_path, '053290_1.jpg'),
+                    os.path.join(garment_path, '053744_1.jpg'),
+                    os.path.join(garment_path, '053742_1.jpg'),
+                    os.path.join(garment_path, '053786_1.jpg'),
+                    os.path.join(garment_path, '053790_1.jpg'),
+                    os.path.join(garment_path, '053319_1.jpg'),
+                    os.path.join(garment_path, '052234_1.jpg'),
                 ])
         with gr.Column():
             result_gallery_dc = gr.Gallery(label='Output', show_label=False, elem_id="gallery", preview=True, scale=1)
         # scale_dc = gr.Slider(label="Scale", minimum=1.0, maximum=12.0, value=5.0, step=0.1)
         # image_scale_dc = gr.Slider(label="Guidance scale", minimum=1.0, maximum=5.0, value=2.0, step=0.1)
         # seed_dc = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, value=-1)
+    # ips_dc = [vton_img_dc, garm_img_dc, category_dc]
+    ips_dc = [vton_img_dc, garm_img_dc ,category_dc]
     run_button_dc.click(fn=process_dc, inputs=ips_dc, outputs=[result_gallery_dc])
+block.launch(server_name="0.0.0.0", server_port=7860 )

test_df.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+def check_bf16_support():
+    if not torch.cuda.is_available():
+        print("CUDA is not available on this system.")
+        return False
+    device = torch.device("cuda")
+    capability = torch.cuda.get_device_capability(device)
+    # As of now, GPUs with compute capability >= 8.0 support BF16
+    # Example: NVIDIA A100 has compute capability 8.0
+    bf16_supported = capability[0] >= 8
+    print(f"GPU Compute Capability: {capability}")
+    if bf16_supported:
+        print("BF16 is supported on this GPU.")
+    else:
+        print("BF16 is not supported on this GPU.")
+    return bf16_supported
+# Check if BF16 is supported
+check_bf16_support()