Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.ipynb_checkpoints/app-checkpoint.py +1 -1
.ipynb_checkpoints/call_api-checkpoint.py +41 -0
.ipynb_checkpoints/handler-checkpoint.py +25 -0
.ipynb_checkpoints/helpers-checkpoint.py +236 -0
.ipynb_checkpoints/streamlit_code-checkpoint.py +81 -0
app.py +1 -1
call_api.py +41 -0
handler.py +25 -0
helpers.py +236 -0
maskask.webp +0 -0
output.webp +0 -0
streamlit_code.py +81 -0

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

	@@ -309,5 +309,5 @@ with image_blocks as demo:
309
310
311
312	- image_blocks.launch(~~share=True~~)
313


309
310
311
312	+ image_blocks.launch()
313

.ipynb_checkpoints/call_api-checkpoint.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import runpod
+import json
+import os
+import requests
+import time
+# json test file name
+json_test_file = "test_input.json"
+# open test file
+with open(json_test_file) as f:
+    input = json.load(f)
+# load runpod api key and serverless model id
+runpod.api_key = ""
+RUNPOD_MODEL_ID = ""
+endpoint = runpod.Endpoint(RUNPOD_MODEL_ID)
+start = time.time()
+# First way to call serverless api
+run_request = endpoint.run_sync(input)
+print(run_request)
+end = time.time()
+print('Time taken: ', end-start)
+# Second way to call serverless api
+url = f'https://api.runpod.ai/v2/{RUNPOD_MODEL_ID}/run_sync' # or change to runsync
+headers = {
+    'accept': 'application/json',
+    'Content-Type': 'application/json',
+    'Authorization': f'Bearer {runpod.api_key}'
+}
+start = time.time()
+response = requests.post(url, headers=headers, data=json.dumps(input))
+print('\n')
+print(response.json())
+end = time.time()
+print('Time taken: ', end-start)

.ipynb_checkpoints/handler-checkpoint.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import runpod
+from helpers import prepare_pipeline, get_result, b64_to_pil
+import base64
+from PIL import Image
+def handler(job):
+    human_img_b64 = job['input']['human_img_b64']
+    human_img = b64_to_pil(human_img_b64)
+    garm_img_b64 = job['input']['garm_img_b64']
+    garm_img = b64_to_pil(garm_img_b64)
+    denoise_steps = job['input'].get('denoise_steps') if job['input'].get('denoise_steps') else 30
+    seed = job['input'].get('seed') if job['input'].get('seed') else 42
+    is_checked_crop = job['input'].get('is_checked_crop') if job['input'].get('is_checked_crop') else False
+    garment_des = job['input'].get('garment_des') if job['input'].get('garment_des') else ""
+    result = get_result(PIPE, human_img, garm_img, denoise_steps, seed, is_checked_crop, garment_des)
+    return pil_to_b64(result)
+PIPE = prepare_pipeline()
+runpod.serverless.start({"handler": handler})

.ipynb_checkpoints/helpers-checkpoint.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import base64
+from io import BytesIO
+from PIL import Image
+from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
+from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
+from src.unet_hacked_tryon import UNet2DConditionModel
+from transformers import (
+    CLIPImageProcessor,
+    CLIPVisionModelWithProjection,
+    CLIPTextModel,
+    CLIPTextModelWithProjection,
+)
+from diffusers import DDPMScheduler,AutoencoderKL
+from typing import List
+import torch
+import os
+from transformers import AutoTokenizer
+import numpy as np
+from utils_mask import get_mask_location
+from torchvision import transforms
+import apply_net
+from preprocess.humanparsing.run_parsing import Parsing
+from preprocess.openpose.run_openpose import OpenPose
+from detectron2.data.detection_utils import convert_PIL_to_numpy,_apply_exif_orientation
+from torchvision.transforms.functional import to_pil_image
+def b64_to_pil():
+    # Decode the base64 string
+    image_data = base64.b64decode(base64_string)
+    # Create a PIL Image object from the decoded image data
+    image = Image.open(BytesIO(image_data))
+    return image
+def prepare_pipeline():
+    pass
+base_path = 'yisol/IDM-VTON'
+example_path = os.path.join(os.path.dirname(__file__), 'example')
+unet = UNet2DConditionModel.from_pretrained(
+    base_path,
+    subfolder="unet",
+    torch_dtype=torch.float16,
+)
+unet.requires_grad_(False)
+tokenizer_one = AutoTokenizer.from_pretrained(
+    base_path,
+    subfolder="tokenizer",
+    revision=None,
+    use_fast=False,
+)
+tokenizer_two = AutoTokenizer.from_pretrained(
+    base_path,
+    subfolder="tokenizer_2",
+    revision=None,
+    use_fast=False,
+)
+noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
+text_encoder_one = CLIPTextModel.from_pretrained(
+    base_path,
+    subfolder="text_encoder",
+    torch_dtype=torch.float16,
+)
+text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
+    base_path,
+    subfolder="text_encoder_2",
+    torch_dtype=torch.float16,
+)
+image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+    base_path,
+    subfolder="image_encoder",
+    torch_dtype=torch.float16,
+    )
+vae = AutoencoderKL.from_pretrained(base_path,
+                                    subfolder="vae",
+                                    torch_dtype=torch.float16,
+)
+# "stabilityai/stable-diffusion-xl-base-1.0",
+UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
+    base_path,
+    subfolder="unet_encoder",
+    torch_dtype=torch.float16,
+)
+parsing_model = Parsing(0)
+openpose_model = OpenPose(0)
+UNet_Encoder.requires_grad_(False)
+image_encoder.requires_grad_(False)
+vae.requires_grad_(False)
+unet.requires_grad_(False)
+text_encoder_one.requires_grad_(False)
+text_encoder_two.requires_grad_(False)
+tensor_transfrom = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize([0.5], [0.5]),
+            ]
+    )
+pipe = TryonPipeline.from_pretrained(
+        base_path,
+        unet=unet,
+        vae=vae,
+        feature_extractor= CLIPImageProcessor(),
+        text_encoder = text_encoder_one,
+        text_encoder_2 = text_encoder_two,
+        tokenizer = tokenizer_one,
+        tokenizer_2 = tokenizer_two,
+        scheduler = noise_scheduler,
+        image_encoder=image_encoder,
+        torch_dtype=torch.float16,
+)
+pipe.unet_encoder = UNet_Encoder
+def get_result(human_img,garm_img, body_part="upper_body",denoise_steps=30,seed=42,is_checked_crop=False,garment_des=""):
+    device = "cuda"
+    openpose_model.preprocessor.body_estimation.model.to(device)
+    pipe.to(device)
+    pipe.unet_encoder.to(device)
+    garm_img= garm_img.convert("RGB").resize((768,1024))
+    human_img_orig = human_img
+    if is_checked_crop:
+        width, height = human_img_orig.size
+        target_width = int(min(width, height * (3 / 4)))
+        target_height = int(min(height, width * (4 / 3)))
+        left = (width - target_width) / 2
+        top = (height - target_height) / 2
+        right = (width + target_width) / 2
+        bottom = (height + target_height) / 2
+        cropped_img = human_img_orig.crop((left, top, right, bottom))
+        crop_size = cropped_img.size
+        human_img = cropped_img.resize((768,1024))
+    else:
+        human_img = human_img_orig.resize((768,1024))
+    keypoints = openpose_model(human_img.resize((384,512)))
+    model_parse, _ = parsing_model(human_img.resize((384,512)))
+    mask, mask_gray = get_mask_location('hd', body_part, model_parse, keypoints)
+    mask = mask.resize((768,1024))
+    mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
+    mask_gray = to_pil_image((mask_gray+1.0)/2.0)
+    human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
+    human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
+    args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
+    # verbosity = getattr(args, "verbosity", None)
+    pose_img = args.func(args,human_img_arg)
+    pose_img = pose_img[:,:,::-1]
+    pose_img = Image.fromarray(pose_img).resize((768,1024))
+    with torch.no_grad():
+        # Extract the images
+        with torch.cuda.amp.autocast():
+            with torch.no_grad():
+                prompt = "model is wearing " + garment_des
+                negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+                with torch.inference_mode():
+                    (
+                        prompt_embeds,
+                        negative_prompt_embeds,
+                        pooled_prompt_embeds,
+                        negative_pooled_prompt_embeds,
+                    ) = pipe.encode_prompt(
+                        prompt,
+                        num_images_per_prompt=1,
+                        do_classifier_free_guidance=True,
+                        negative_prompt=negative_prompt,
+                    )
+                    prompt = "a photo of " + garment_des
+                    negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+                    if not isinstance(prompt, List):
+                        prompt = [prompt] * 1
+                    if not isinstance(negative_prompt, List):
+                        negative_prompt = [negative_prompt] * 1
+                    with torch.inference_mode():
+                        (
+                            prompt_embeds_c,
+                            _,
+                            _,
+                            _,
+                        ) = pipe.encode_prompt(
+                            prompt,
+                            num_images_per_prompt=1,
+                            do_classifier_free_guidance=False,
+                            negative_prompt=negative_prompt,
+                        )
+                    pose_img =  tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
+                    garm_tensor =  tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
+                    generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
+                    images = pipe(
+                        prompt_embeds=prompt_embeds.to(device,torch.float16),
+                        negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
+                        pooled_prompt_embeds=pooled_prompt_embeds.to(device,torch.float16),
+                        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
+                        num_inference_steps=denoise_steps,
+                        generator=generator,
+                        strength = 1.0,
+                        pose_img = pose_img.to(device,torch.float16),
+                        text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
+                        cloth = garm_tensor.to(device,torch.float16),
+                        mask_image=mask,
+                        image=human_img,
+                        height=1024,
+                        width=768,
+                        ip_adapter_image = garm_img.resize((768,1024)),
+                        guidance_scale=2.0,
+                    )[0]
+    if is_checked_crop:
+        out_img = images[0].resize(crop_size)
+        human_img_orig.paste(out_img, (int(left), int(top)))
+        return human_img_orig, mask_gray
+    else:
+        return images[0], mask_gray
+    # return images[0], mask_gray

.ipynb_checkpoints/streamlit_code-checkpoint.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import streamlit as st
+from streamlit_image_select import image_select
+from helpers import get_result
+from PIL import Image
+import numpy as np
+# UI configurations
+st.set_page_config(page_title="AIFR - Demo",
+                   page_icon=":bridge_at_night:",
+                   layout="wide")
+st.markdown("# :rainbow[AIFR - Demo]")
+# 3 columns
+col1, col2, col3, col4 = st.columns(4)
+with col1:
+    st.header("User Image")
+    user_image_holder = st.empty()
+    # upload file
+    user_image = st.file_uploader("Upload User Image")
+    if user_image is not None:
+        img = None
+        user_image_holder.image(user_image, use_column_width=True)
+    # st.write("Examples")
+    # img1 = image_select(
+    #     label="Select a cat",
+    #     images=[
+    #         "example1.jpg",
+    #         "example2.jpg"
+    #     ],
+    #     captions=["A cat", "Another cat"],
+    # )
+    # if img1 and user_image is None:
+    #     user_image = img1
+    #     user_image_holder.image(user_image, use_column_width=True)
+with col2:
+    st.header("Clothes Image")
+    clothes_image_holder = st.empty()
+    # upload file
+    clothes_image = st.file_uploader("Upload Clothes Image")
+    if clothes_image is not None:
+        clothes_image_holder.image(clothes_image, use_column_width=True)
+    # st.write("Examples")
+    # img2 = image_select(
+    #     label="Select a dress",
+    #     images=[
+    #         "https://bagongkia.github.io/react-image-picker/0759b6e526e3c6d72569894e58329d89.jpg",
+    #         "https://bagongkia.github.io/react-image-picker/0759b6e526e3c6d72569894e58329d89.jpg"
+    #     ],
+    #     captions=["A dress", "Another dress"],
+    # )
+    # if img2 and clothes_image is None:
+    #     clothes_image = img2
+    #     clothes_image_holder.image(clothes_image, use_column_width=True)
+body_part = st.selectbox(
+    "Choose your body part",
+    ("dresses", "upper_body", "lower_body"))
+submitted = st.button("Get result", use_container_width=True, type="primary")
+output_image = mask_image = None
+if submitted:
+    user_image = Image.open(user_image)
+    clothes_image = Image.open(clothes_image)
+    output_image, mask_image = get_result(user_image, clothes_image, body_part=body_part)
+with col3:
+    st.header("Masked Image output")
+    if submitted:
+        if mask_image is not None:
+            st.image(mask_image, use_column_width=True)
+with col4:
+    st.header("Output")
+    if submitted:
+        if output_image is not None:
+            st.image(output_image, use_column_width=True)

app.py CHANGED Viewed

	@@ -309,5 +309,5 @@ with image_blocks as demo:
309
310
311
312	- image_blocks.launch(~~share=True~~)
313


309
310
311
312	+ image_blocks.launch()
313

call_api.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import runpod
+import json
+import os
+import requests
+import time
+# json test file name
+json_test_file = "test_input.json"
+# open test file
+with open(json_test_file) as f:
+    input = json.load(f)
+# load runpod api key and serverless model id
+runpod.api_key = ""
+RUNPOD_MODEL_ID = ""
+endpoint = runpod.Endpoint(RUNPOD_MODEL_ID)
+start = time.time()
+# First way to call serverless api
+run_request = endpoint.run_sync(input)
+print(run_request)
+end = time.time()
+print('Time taken: ', end-start)
+# Second way to call serverless api
+url = f'https://api.runpod.ai/v2/{RUNPOD_MODEL_ID}/run_sync' # or change to runsync
+headers = {
+    'accept': 'application/json',
+    'Content-Type': 'application/json',
+    'Authorization': f'Bearer {runpod.api_key}'
+}
+start = time.time()
+response = requests.post(url, headers=headers, data=json.dumps(input))
+print('\n')
+print(response.json())
+end = time.time()
+print('Time taken: ', end-start)

handler.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import runpod
+from helpers import prepare_pipeline, get_result, b64_to_pil
+import base64
+from PIL import Image
+def handler(job):
+    human_img_b64 = job['input']['human_img_b64']
+    human_img = b64_to_pil(human_img_b64)
+    garm_img_b64 = job['input']['garm_img_b64']
+    garm_img = b64_to_pil(garm_img_b64)
+    denoise_steps = job['input'].get('denoise_steps') if job['input'].get('denoise_steps') else 30
+    seed = job['input'].get('seed') if job['input'].get('seed') else 42
+    is_checked_crop = job['input'].get('is_checked_crop') if job['input'].get('is_checked_crop') else False
+    garment_des = job['input'].get('garment_des') if job['input'].get('garment_des') else ""
+    result = get_result(PIPE, human_img, garm_img, denoise_steps, seed, is_checked_crop, garment_des)
+    return pil_to_b64(result)
+PIPE = prepare_pipeline()
+runpod.serverless.start({"handler": handler})

helpers.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import base64
+from io import BytesIO
+from PIL import Image
+from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
+from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
+from src.unet_hacked_tryon import UNet2DConditionModel
+from transformers import (
+    CLIPImageProcessor,
+    CLIPVisionModelWithProjection,
+    CLIPTextModel,
+    CLIPTextModelWithProjection,
+)
+from diffusers import DDPMScheduler,AutoencoderKL
+from typing import List
+import torch
+import os
+from transformers import AutoTokenizer
+import numpy as np
+from utils_mask import get_mask_location
+from torchvision import transforms
+import apply_net
+from preprocess.humanparsing.run_parsing import Parsing
+from preprocess.openpose.run_openpose import OpenPose
+from detectron2.data.detection_utils import convert_PIL_to_numpy,_apply_exif_orientation
+from torchvision.transforms.functional import to_pil_image
+def b64_to_pil():
+    # Decode the base64 string
+    image_data = base64.b64decode(base64_string)
+    # Create a PIL Image object from the decoded image data
+    image = Image.open(BytesIO(image_data))
+    return image
+def prepare_pipeline():
+    pass
+base_path = 'yisol/IDM-VTON'
+example_path = os.path.join(os.path.dirname(__file__), 'example')
+unet = UNet2DConditionModel.from_pretrained(
+    base_path,
+    subfolder="unet",
+    torch_dtype=torch.float16,
+)
+unet.requires_grad_(False)
+tokenizer_one = AutoTokenizer.from_pretrained(
+    base_path,
+    subfolder="tokenizer",
+    revision=None,
+    use_fast=False,
+)
+tokenizer_two = AutoTokenizer.from_pretrained(
+    base_path,
+    subfolder="tokenizer_2",
+    revision=None,
+    use_fast=False,
+)
+noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
+text_encoder_one = CLIPTextModel.from_pretrained(
+    base_path,
+    subfolder="text_encoder",
+    torch_dtype=torch.float16,
+)
+text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
+    base_path,
+    subfolder="text_encoder_2",
+    torch_dtype=torch.float16,
+)
+image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+    base_path,
+    subfolder="image_encoder",
+    torch_dtype=torch.float16,
+    )
+vae = AutoencoderKL.from_pretrained(base_path,
+                                    subfolder="vae",
+                                    torch_dtype=torch.float16,
+)
+# "stabilityai/stable-diffusion-xl-base-1.0",
+UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
+    base_path,
+    subfolder="unet_encoder",
+    torch_dtype=torch.float16,
+)
+parsing_model = Parsing(0)
+openpose_model = OpenPose(0)
+UNet_Encoder.requires_grad_(False)
+image_encoder.requires_grad_(False)
+vae.requires_grad_(False)
+unet.requires_grad_(False)
+text_encoder_one.requires_grad_(False)
+text_encoder_two.requires_grad_(False)
+tensor_transfrom = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize([0.5], [0.5]),
+            ]
+    )
+pipe = TryonPipeline.from_pretrained(
+        base_path,
+        unet=unet,
+        vae=vae,
+        feature_extractor= CLIPImageProcessor(),
+        text_encoder = text_encoder_one,
+        text_encoder_2 = text_encoder_two,
+        tokenizer = tokenizer_one,
+        tokenizer_2 = tokenizer_two,
+        scheduler = noise_scheduler,
+        image_encoder=image_encoder,
+        torch_dtype=torch.float16,
+)
+pipe.unet_encoder = UNet_Encoder
+def get_result(human_img,garm_img, body_part="upper_body",denoise_steps=30,seed=42,is_checked_crop=False,garment_des=""):
+    device = "cuda"
+    openpose_model.preprocessor.body_estimation.model.to(device)
+    pipe.to(device)
+    pipe.unet_encoder.to(device)
+    garm_img= garm_img.convert("RGB").resize((768,1024))
+    human_img_orig = human_img
+    if is_checked_crop:
+        width, height = human_img_orig.size
+        target_width = int(min(width, height * (3 / 4)))
+        target_height = int(min(height, width * (4 / 3)))
+        left = (width - target_width) / 2
+        top = (height - target_height) / 2
+        right = (width + target_width) / 2
+        bottom = (height + target_height) / 2
+        cropped_img = human_img_orig.crop((left, top, right, bottom))
+        crop_size = cropped_img.size
+        human_img = cropped_img.resize((768,1024))
+    else:
+        human_img = human_img_orig.resize((768,1024))
+    keypoints = openpose_model(human_img.resize((384,512)))
+    model_parse, _ = parsing_model(human_img.resize((384,512)))
+    mask, mask_gray = get_mask_location('hd', body_part, model_parse, keypoints)
+    mask = mask.resize((768,1024))
+    mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
+    mask_gray = to_pil_image((mask_gray+1.0)/2.0)
+    human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
+    human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
+    args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
+    # verbosity = getattr(args, "verbosity", None)
+    pose_img = args.func(args,human_img_arg)
+    pose_img = pose_img[:,:,::-1]
+    pose_img = Image.fromarray(pose_img).resize((768,1024))
+    with torch.no_grad():
+        # Extract the images
+        with torch.cuda.amp.autocast():
+            with torch.no_grad():
+                prompt = "model is wearing " + garment_des
+                negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+                with torch.inference_mode():
+                    (
+                        prompt_embeds,
+                        negative_prompt_embeds,
+                        pooled_prompt_embeds,
+                        negative_pooled_prompt_embeds,
+                    ) = pipe.encode_prompt(
+                        prompt,
+                        num_images_per_prompt=1,
+                        do_classifier_free_guidance=True,
+                        negative_prompt=negative_prompt,
+                    )
+                    prompt = "a photo of " + garment_des
+                    negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+                    if not isinstance(prompt, List):
+                        prompt = [prompt] * 1
+                    if not isinstance(negative_prompt, List):
+                        negative_prompt = [negative_prompt] * 1
+                    with torch.inference_mode():
+                        (
+                            prompt_embeds_c,
+                            _,
+                            _,
+                            _,
+                        ) = pipe.encode_prompt(
+                            prompt,
+                            num_images_per_prompt=1,
+                            do_classifier_free_guidance=False,
+                            negative_prompt=negative_prompt,
+                        )
+                    pose_img =  tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
+                    garm_tensor =  tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
+                    generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
+                    images = pipe(
+                        prompt_embeds=prompt_embeds.to(device,torch.float16),
+                        negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
+                        pooled_prompt_embeds=pooled_prompt_embeds.to(device,torch.float16),
+                        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
+                        num_inference_steps=denoise_steps,
+                        generator=generator,
+                        strength = 1.0,
+                        pose_img = pose_img.to(device,torch.float16),
+                        text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
+                        cloth = garm_tensor.to(device,torch.float16),
+                        mask_image=mask,
+                        image=human_img,
+                        height=1024,
+                        width=768,
+                        ip_adapter_image = garm_img.resize((768,1024)),
+                        guidance_scale=2.0,
+                    )[0]
+    if is_checked_crop:
+        out_img = images[0].resize(crop_size)
+        human_img_orig.paste(out_img, (int(left), int(top)))
+        return human_img_orig, mask_gray
+    else:
+        return images[0], mask_gray
+    # return images[0], mask_gray

maskask.webp ADDED Viewed

output.webp ADDED Viewed

streamlit_code.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import streamlit as st
+from streamlit_image_select import image_select
+from helpers import get_result
+from PIL import Image
+import numpy as np
+# UI configurations
+st.set_page_config(page_title="AIFR - Demo",
+                   page_icon=":bridge_at_night:",
+                   layout="wide")
+st.markdown("# :rainbow[AIFR - Demo]")
+# 3 columns
+col1, col2, col3, col4 = st.columns(4)
+with col1:
+    st.header("User Image")
+    user_image_holder = st.empty()
+    # upload file
+    user_image = st.file_uploader("Upload User Image")
+    if user_image is not None:
+        img = None
+        user_image_holder.image(user_image, use_column_width=True)
+    # st.write("Examples")
+    # img1 = image_select(
+    #     label="Select a cat",
+    #     images=[
+    #         "example1.jpg",
+    #         "example2.jpg"
+    #     ],
+    #     captions=["A cat", "Another cat"],
+    # )
+    # if img1 and user_image is None:
+    #     user_image = img1
+    #     user_image_holder.image(user_image, use_column_width=True)
+with col2:
+    st.header("Clothes Image")
+    clothes_image_holder = st.empty()
+    # upload file
+    clothes_image = st.file_uploader("Upload Clothes Image")
+    if clothes_image is not None:
+        clothes_image_holder.image(clothes_image, use_column_width=True)
+    # st.write("Examples")
+    # img2 = image_select(
+    #     label="Select a dress",
+    #     images=[
+    #         "https://bagongkia.github.io/react-image-picker/0759b6e526e3c6d72569894e58329d89.jpg",
+    #         "https://bagongkia.github.io/react-image-picker/0759b6e526e3c6d72569894e58329d89.jpg"
+    #     ],
+    #     captions=["A dress", "Another dress"],
+    # )
+    # if img2 and clothes_image is None:
+    #     clothes_image = img2
+    #     clothes_image_holder.image(clothes_image, use_column_width=True)
+body_part = st.selectbox(
+    "Choose your body part",
+    ("dresses", "upper_body", "lower_body"))
+submitted = st.button("Get result", use_container_width=True, type="primary")
+output_image = mask_image = None
+if submitted:
+    user_image = Image.open(user_image)
+    clothes_image = Image.open(clothes_image)
+    output_image, mask_image = get_result(user_image, clothes_image, body_part=body_part)
+with col3:
+    st.header("Masked Image output")
+    if submitted:
+        if mask_image is not None:
+            st.image(mask_image, use_column_width=True)
+with col4:
+    st.header("Output")
+    if submitted:
+        if output_image is not None:
+            st.image(output_image, use_column_width=True)