Spaces:

silentchen
/

test

Paused

App Files Files Community

silentchen commited on Mar 24, 2023

Commit

3ab28ab

1 Parent(s): eea38fb

first commit

Browse files

Files changed (5) hide show

DejaVuSansMono.ttf +0 -0
app.py +748 -0
images/hello_kitty_results.png +0 -0
images/input.png +0 -0
requirements.txt +18 -0

DejaVuSansMono.ttf ADDED Viewed

Binary file (341 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,748 @@

+import gradio as gr
+import torch
+from omegaconf import OmegaConf
+# from gligen.task_grounded_generation import grounded_generation_box, load_ckpt, load_common_ckpt
+import json
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+from functools import partial
+from collections import Counter
+import math
+import gc
+from gradio import processing_utils
+from typing import Optional
+import warnings
+from datetime import datetime
+from huggingface_hub import hf_hub_download
+hf_hub_download = partial(hf_hub_download, library_name="gligen_demo")
+import sys
+sys.tracebacklimit = 0
+def load_from_hf(repo_id, filename='diffusion_pytorch_model.bin', subfolder=None):
+    cache_file = hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder)
+    return torch.load(cache_file, map_location='cpu')
+def load_ckpt_config_from_hf(modality):
+    ckpt = load_from_hf('gligen/demo_ckpts_legacy', filename=f'{modality}.pth', subfolder='model')
+    config = load_from_hf('gligen/demo_ckpts_legacy', filename=f'{modality}.pth', subfolder='config')
+    return ckpt, config
+def ckpt_load_helper(modality, is_inpaint, is_style, common_instances=None):
+    pretrained_ckpt_gligen, config = load_ckpt_config_from_hf(modality)
+    config = OmegaConf.create(config["_content"])  # config used in training
+    config.alpha_scale = 1.0
+    config.model['params']['is_inpaint'] = is_inpaint
+    config.model['params']['is_style'] = is_style
+    if common_instances is None:
+        common_ckpt = load_from_hf('gligen/demo_ckpts_legacy', filename=f'common.pth', subfolder='model')
+        common_instances = load_common_ckpt(config, common_ckpt)
+    loaded_model_list = load_ckpt(config, pretrained_ckpt_gligen, common_instances)
+    return loaded_model_list, common_instances
+class Instance:
+    def __init__(self, capacity=2):
+        self.model_type = 'base'
+        self.loaded_model_list = {}
+        self.counter = Counter()
+        self.global_counter = Counter()
+        self.loaded_model_list['base'], self.common_instances = ckpt_load_helper(
+            'gligen-generation-text-box',
+            is_inpaint=False, is_style=False, common_instances=None
+        )
+        self.capacity = capacity
+    def _log(self, model_type, batch_size, instruction, phrase_list):
+        self.counter[model_type] += 1
+        self.global_counter[model_type] += 1
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        print('[{}] Current: {}, All: {}. Samples: {}, prompt: {}, phrases: {}'.format(
+            current_time, dict(self.counter), dict(self.global_counter), batch_size, instruction, phrase_list
+        ))
+    def get_model(self, model_type, batch_size, instruction, phrase_list):
+        if model_type in self.loaded_model_list:
+            self._log(model_type, batch_size, instruction, phrase_list)
+            return self.loaded_model_list[model_type]
+        if self.capacity == len(self.loaded_model_list):
+            least_used_type = self.counter.most_common()[-1][0]
+            del self.loaded_model_list[least_used_type]
+            del self.counter[least_used_type]
+            gc.collect()
+            torch.cuda.empty_cache()
+        self.loaded_model_list[model_type] = self._get_model(model_type)
+        self._log(model_type, batch_size, instruction, phrase_list)
+        return self.loaded_model_list[model_type]
+    def _get_model(self, model_type):
+        if model_type == 'base':
+            return ckpt_load_helper(
+                'gligen-generation-text-box',
+                is_inpaint=False, is_style=False, common_instances=self.common_instances
+            )[0]
+        elif model_type == 'inpaint':
+            return ckpt_load_helper(
+                'gligen-inpainting-text-box',
+                is_inpaint=True, is_style=False, common_instances=self.common_instances
+            )[0]
+        elif model_type == 'style':
+            return ckpt_load_helper(
+                'gligen-generation-text-image-box',
+                is_inpaint=False, is_style=True, common_instances=self.common_instances
+            )[0]
+        assert False
+# instance = Instance()
+def load_clip_model():
+    from transformers import CLIPProcessor, CLIPModel
+    version = "openai/clip-vit-large-patch14"
+    model = CLIPModel.from_pretrained(version).cuda()
+    processor = CLIPProcessor.from_pretrained(version)
+    return {
+        'version': version,
+        'model': model,
+        'processor': processor,
+    }
+# clip_model = load_clip_model()
+class ImageMask(gr.components.Image):
+    """
+    Sets: source="canvas", tool="sketch"
+    """
+    is_template = True
+    def __init__(self, **kwargs):
+        super().__init__(source="upload", tool="sketch", interactive=True, **kwargs)
+    def preprocess(self, x):
+        if x is None:
+            return x
+        if self.tool == "sketch" and self.source in ["upload", "webcam"] and type(x) != dict:
+            decode_image = processing_utils.decode_base64_to_image(x)
+            width, height = decode_image.size
+            mask = np.zeros((height, width, 4), dtype=np.uint8)
+            mask[..., -1] = 255
+            mask = self.postprocess(mask)
+            x = {'image': x, 'mask': mask}
+        return super().preprocess(x)
+class Blocks(gr.Blocks):
+    def __init__(
+            self,
+            theme: str = "default",
+            analytics_enabled: Optional[bool] = None,
+            mode: str = "blocks",
+            title: str = "Gradio",
+            css: Optional[str] = None,
+            **kwargs,
+    ):
+        self.extra_configs = {
+            'thumbnail': kwargs.pop('thumbnail', ''),
+            'url': kwargs.pop('url', 'https://gradio.app/'),
+            'creator': kwargs.pop('creator', '@teamGradio'),
+        }
+        super(Blocks, self).__init__(theme, analytics_enabled, mode, title, css, **kwargs)
+        warnings.filterwarnings("ignore")
+    def get_config_file(self):
+        config = super(Blocks, self).get_config_file()
+        for k, v in self.extra_configs.items():
+            config[k] = v
+        return config
+'''
+inference model
+'''
+@torch.no_grad()
+def inference(task, language_instruction, grounding_instruction, inpainting_boxes_nodrop, image,
+              alpha_sample, guidance_scale, batch_size,
+              fix_seed, rand_seed, actual_mask, style_image,
+              *args, **kwargs):
+    grounding_instruction = json.loads(grounding_instruction)
+    phrase_list, location_list = [], []
+    for k, v in grounding_instruction.items():
+        phrase_list.append(k)
+        location_list.append(v)
+    placeholder_image = Image.open('images/teddy.jpg').convert("RGB")
+    image_list = [placeholder_image] * len(phrase_list)  # placeholder input for visual prompt, which is disabled
+    batch_size = int(batch_size)
+    if not 1 <= batch_size <= 4:
+        batch_size = 2
+    if style_image == None:
+        has_text_mask = 1
+        has_image_mask = 0  # then we hack above 'image_list'
+    else:
+        valid_phrase_len = len(phrase_list)
+        phrase_list += ['placeholder']
+        has_text_mask = [1] * valid_phrase_len + [0]
+        image_list = [placeholder_image] * valid_phrase_len + [style_image]
+        has_image_mask = [0] * valid_phrase_len + [1]
+        location_list += [[0.0, 0.0, 1, 0.01]]  # style image grounding location
+    if task == 'Grounded Inpainting':
+        alpha_sample = 1.0
+    instruction = dict(
+        prompt=language_instruction,
+        phrases=phrase_list,
+        images=image_list,
+        locations=location_list,
+        alpha_type=[alpha_sample, 0, 1.0 - alpha_sample],
+        has_text_mask=has_text_mask,
+        has_image_mask=has_image_mask,
+        save_folder_name=language_instruction,
+        guidance_scale=guidance_scale,
+        batch_size=batch_size,
+        fix_seed=bool(fix_seed),
+        rand_seed=int(rand_seed),
+        actual_mask=actual_mask,
+        inpainting_boxes_nodrop=inpainting_boxes_nodrop,
+    )
+    get_model = partial(instance.get_model,
+                        batch_size=batch_size,
+                        instruction=language_instruction,
+                        phrase_list=phrase_list)
+    with torch.autocast(device_type='cuda', dtype=torch.float16):
+        if task == 'Grounded Generation':
+            if style_image == None:
+                return grounded_generation_box(get_model('base'), instruction, *args, **kwargs)
+            else:
+                return grounded_generation_box(get_model('style'), instruction, *args, **kwargs)
+        elif task == 'Grounded Inpainting':
+            assert image is not None
+            instruction['input_image'] = image.convert("RGB")
+            return grounded_generation_box(get_model('inpaint'), instruction, *args, **kwargs)
+def draw_box(boxes=[], texts=[], img=None):
+    if len(boxes) == 0 and img is None:
+        return None
+    if img is None:
+        img = Image.new('RGB', (512, 512), (255, 255, 255))
+    colors = ["red", "olive", "blue", "green", "orange", "brown", "cyan", "purple"]
+    draw = ImageDraw.Draw(img)
+    font = ImageFont.truetype("DejaVuSansMono.ttf", size=18)
+    print(boxes)
+    for bid, box in enumerate(boxes):
+        draw.rectangle([box[0], box[1], box[2], box[3]], outline=colors[bid % len(colors)], width=4)
+        anno_text = texts[bid]
+        draw.rectangle(
+            [box[0], box[3] - int(font.size * 1.2), box[0] + int((len(anno_text) + 0.8) * font.size * 0.6), box[3]],
+            outline=colors[bid % len(colors)], fill=colors[bid % len(colors)], width=4)
+        draw.text([box[0] + int(font.size * 0.2), box[3] - int(font.size * 1.2)], anno_text, font=font,
+                  fill=(255, 255, 255))
+    return img
+def get_concat(ims):
+    if len(ims) == 1:
+        n_col = 1
+    else:
+        n_col = 2
+    n_row = math.ceil(len(ims) / 2)
+    dst = Image.new('RGB', (ims[0].width * n_col, ims[0].height * n_row), color="white")
+    for i, im in enumerate(ims):
+        row_id = i // n_col
+        col_id = i % n_col
+        dst.paste(im, (im.width * col_id, im.height * row_id))
+    return dst
+def auto_append_grounding(language_instruction, grounding_texts):
+    for grounding_text in grounding_texts:
+        if grounding_text not in language_instruction and grounding_text != 'auto':
+            language_instruction += "; " + grounding_text
+    return language_instruction
+def generate(task, language_instruction, grounding_texts, sketch_pad,
+             alpha_sample, guidance_scale, batch_size,
+             fix_seed, rand_seed, use_actual_mask, append_grounding, style_cond_image,
+             state):
+    if 'boxes' not in state:
+        state['boxes'] = []
+    boxes = state['boxes']
+    grounding_texts = [x.strip() for x in grounding_texts.split(';')]
+    # assert len(boxes) == len(grounding_texts)
+    if len(boxes) != len(grounding_texts):
+        if len(boxes) < len(grounding_texts):
+            raise ValueError("""The number of boxes should be equal to the number of grounding objects.
+Number of boxes drawn: {}, number of grounding tokens: {}.
+Please draw boxes accordingly on the sketch pad.""".format(len(boxes), len(grounding_texts)))
+        grounding_texts = grounding_texts + [""] * (len(boxes) - len(grounding_texts))
+    boxes = (np.asarray(boxes) / 512).tolist()
+    grounding_instruction = json.dumps({obj: box for obj, box in zip(grounding_texts, boxes)})
+    image = None
+    actual_mask = None
+    if task == 'Grounded Inpainting':
+        image = state.get('original_image', sketch_pad['image']).copy()
+        image = center_crop(image)
+        image = Image.fromarray(image)
+        if use_actual_mask:
+            actual_mask = sketch_pad['mask'].copy()
+            if actual_mask.ndim == 3:
+                actual_mask = actual_mask[..., 0]
+            actual_mask = center_crop(actual_mask, tgt_size=(64, 64))
+            actual_mask = torch.from_numpy(actual_mask == 0).float()
+        if state.get('inpaint_hw', None):
+            boxes = np.asarray(boxes) * 0.9 + 0.05
+            boxes = boxes.tolist()
+            grounding_instruction = json.dumps({obj: box for obj, box in zip(grounding_texts, boxes) if obj != 'auto'})
+    if append_grounding:
+        language_instruction = auto_append_grounding(language_instruction, grounding_texts)
+    gen_images, gen_overlays = inference(
+        task, language_instruction, grounding_instruction, boxes, image,
+        alpha_sample, guidance_scale, batch_size,
+        fix_seed, rand_seed, actual_mask, style_cond_image, clip_model=clip_model,
+    )
+    for idx, gen_image in enumerate(gen_images):
+        if task == 'Grounded Inpainting' and state.get('inpaint_hw', None):
+            hw = min(*state['original_image'].shape[:2])
+            gen_image = sized_center_fill(state['original_image'].copy(), np.array(gen_image.resize((hw, hw))), hw, hw)
+            gen_image = Image.fromarray(gen_image)
+        gen_images[idx] = gen_image
+    blank_samples = batch_size % 2 if batch_size > 1 else 0
+    gen_images = [gr.Image.update(value=x, visible=True) for i, x in enumerate(gen_images)] \
+                 + [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \
+                 + [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)]
+    return gen_images + [state]
+def binarize(x):
+    return (x != 0).astype('uint8') * 255
+def sized_center_crop(img, cropx, cropy):
+    y, x = img.shape[:2]
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    return img[starty:starty + cropy, startx:startx + cropx]
+def sized_center_fill(img, fill, cropx, cropy):
+    y, x = img.shape[:2]
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    img[starty:starty + cropy, startx:startx + cropx] = fill
+    return img
+def sized_center_mask(img, cropx, cropy):
+    y, x = img.shape[:2]
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    center_region = img[starty:starty + cropy, startx:startx + cropx].copy()
+    img = (img * 0.2).astype('uint8')
+    img[starty:starty + cropy, startx:startx + cropx] = center_region
+    return img
+def center_crop(img, HW=None, tgt_size=(512, 512)):
+    if HW is None:
+        H, W = img.shape[:2]
+        HW = min(H, W)
+    img = sized_center_crop(img, HW, HW)
+    img = Image.fromarray(img)
+    img = img.resize(tgt_size)
+    return np.array(img)
+def draw(task, input, grounding_texts, new_image_trigger, state):
+    if type(input) == dict:
+        image = input['image']
+        mask = input['mask']
+    else:
+        mask = input
+    if mask.ndim == 3:
+        mask = mask[..., 0]
+    image_scale = 1.0
+    # resize trigger
+    if task == "Grounded Inpainting":
+        mask_cond = mask.sum() == 0
+        # size_cond = mask.shape != (512, 512)
+        if mask_cond and 'original_image' not in state:
+            image = Image.fromarray(image)
+            width, height = image.size
+            scale = 600 / min(width, height)
+            image = image.resize((int(width * scale), int(height * scale)))
+            state['original_image'] = np.array(image).copy()
+            image_scale = float(height / width)
+            return [None, new_image_trigger + 1, image_scale, state]
+        else:
+            original_image = state['original_image']
+            H, W = original_image.shape[:2]
+            image_scale = float(H / W)
+    mask = binarize(mask)
+    if mask.shape != (512, 512):
+        # assert False, "should not receive any non- 512x512 masks."
+        if 'original_image' in state and state['original_image'].shape[:2] == mask.shape:
+            mask = center_crop(mask, state['inpaint_hw'])
+            image = center_crop(state['original_image'], state['inpaint_hw'])
+        else:
+            mask = np.zeros((512, 512), dtype=np.uint8)
+    # mask = center_crop(mask)
+    mask = binarize(mask)
+    if type(mask) != np.ndarray:
+        mask = np.array(mask)
+    if mask.sum() == 0 and task != "Grounded Inpainting":
+        state = {}
+    if task != 'Grounded Inpainting':
+        image = None
+    else:
+        image = Image.fromarray(image)
+    if 'boxes' not in state:
+        state['boxes'] = []
+    if 'masks' not in state or len(state['masks']) == 0:
+        state['masks'] = []
+        last_mask = np.zeros_like(mask)
+    else:
+        last_mask = state['masks'][-1]
+    if type(mask) == np.ndarray and mask.size > 1:
+        diff_mask = mask - last_mask
+    else:
+        diff_mask = np.zeros([])
+    if diff_mask.sum() > 0:
+        x1x2 = np.where(diff_mask.max(0) != 0)[0]
+        y1y2 = np.where(diff_mask.max(1) != 0)[0]
+        y1, y2 = y1y2.min(), y1y2.max()
+        x1, x2 = x1x2.min(), x1x2.max()
+        if (x2 - x1 > 5) and (y2 - y1 > 5):
+            state['masks'].append(mask.copy())
+            state['boxes'].append((x1, y1, x2, y2))
+    grounding_texts = [x.strip() for x in grounding_texts.split(';')]
+    grounding_texts = [x for x in grounding_texts if len(x) > 0]
+    if len(grounding_texts) < len(state['boxes']):
+        grounding_texts += [f'Obj. {bid + 1}' for bid in range(len(grounding_texts), len(state['boxes']))]
+    print("state", state)
+    box_image = draw_box(state['boxes'], grounding_texts, image)
+    if box_image is not None and state.get('inpaint_hw', None):
+        inpaint_hw = state['inpaint_hw']
+        box_image_resize = np.array(box_image.resize((inpaint_hw, inpaint_hw)))
+        original_image = state['original_image'].copy()
+        box_image = sized_center_fill(original_image, box_image_resize, inpaint_hw, inpaint_hw)
+    print(box_image, new_image_trigger, image_scale, state)
+    return [box_image, new_image_trigger, image_scale, state]
+def clear(task, sketch_pad_trigger, batch_size, state, switch_task=False):
+    if task != 'Grounded Inpainting':
+        sketch_pad_trigger = sketch_pad_trigger + 1
+    blank_samples = batch_size % 2 if batch_size > 1 else 0
+    out_images = [gr.Image.update(value=None, visible=True) for i in range(batch_size)] \
+                 + [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \
+                 + [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)]
+    state = {}
+    return [None, sketch_pad_trigger, None, 1.0] + out_images + [state]
+css = """
+#img2img_image, #img2img_image > .fixed-height, #img2img_image > .fixed-height > div, #img2img_image > .fixed-height > div > img
+{
+    height: var(--height) !important;
+    max-height: var(--height) !important;
+    min-height: var(--height) !important;
+}
+#paper-info a {
+    color:#008AD7;
+    text-decoration: none;
+}
+#paper-info a:hover {
+    cursor: pointer;
+    text-decoration: none;
+}
+"""
+rescale_js = """
+function(x) {
+    const root = document.querySelector('gradio-app').shadowRoot || document.querySelector('gradio-app');
+    let image_scale = parseFloat(root.querySelector('#image_scale input').value) || 1.0;
+    const image_width = root.querySelector('#img2img_image').clientWidth;
+    const target_height = parseInt(image_width * image_scale);
+    document.body.style.setProperty('--height', `${target_height}px`);
+    root.querySelectorAll('button.justify-center.rounded')[0].style.display='none';
+    root.querySelectorAll('button.justify-center.rounded')[1].style.display='none';
+    return x;
+}
+"""
+with Blocks(
+        css=css,
+        analytics_enabled=False,
+        title="GLIGen demo",
+) as main:
+    description = """<p style="text-align: center; font-weight: bold;">
+        <span style="font-size: 28px">Layout Guidance</span>
+        <br>
+        <span style="font-size: 18px" id="paper-info">
+            [<a href="https://gligen.github.io" target="_blank">Project Page</a>]
+            [<a href="https://arxiv.org/abs/2301.07093" target="_blank">Paper</a>]
+            [<a href="https://github.com/gligen/GLIGEN" target="_blank">GitHub</a>]
+            [<a href="https://huggingface.co/spaces/gligen/demo_legacy" target="_blank">Mirror</a>]
+        </span>
+    </p>
+    """
+    gr.HTML(description)
+    with gr.Row():
+        with gr.Column(scale=4):
+            sketch_pad_trigger = gr.Number(value=0, visible=False)
+            sketch_pad_resize_trigger = gr.Number(value=0, visible=False)
+            init_white_trigger = gr.Number(value=0, visible=False)
+            image_scale = gr.Number(value=0, elem_id="image_scale", visible=False)
+            new_image_trigger = gr.Number(value=0, visible=False)
+            # task = gr.Radio(
+            #     choices=["Grounded Generation", 'Grounded Inpainting'],
+            #     type="value",
+            #     value="Grounded Generation",
+            #     label="Task",
+            # )
+            language_instruction = gr.Textbox(
+                label="Text Caption",
+            )
+            grounding_instruction = gr.Textbox(
+                label="Grounding instruction (Separated by semicolon)",
+            )
+            with gr.Row():
+                sketch_pad = ImageMask(label="Sketch Pad", elem_id="img2img_image")
+                out_imagebox = gr.Image(type="pil", label="Parsed Sketch Pad")
+            with gr.Row():
+                clear_btn = gr.Button(value='Clear')
+                gen_btn = gr.Button(value='Generate')
+            with gr.Accordion("Advanced Options", open=False):
+                with gr.Column():
+                    Loss_scale = gr.Slider(minimum=0, maximum=500, step=5, value=30,
+                                             label="Loss Scale Factor")
+                    guidance_scale = gr.Slider(minimum=0, maximum=50, step=0.5, value=7.5, label="Guidance Scale")
+                    batch_size = gr.Slider(minimum=1, maximum=4, step=1, value=2, label="Number of Samples")
+                    max_iter = gr.Slider(minimum=0, maximum=10, step=1, value=5, label="Max Iteration per Step")
+                    loss_threshold = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.2, label="Loss Threshold")
+                    max_step = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Max Step of Backward Guidance")
+                    # append_grounding = gr.Checkbox(value=True, label="Append grounding instructions to the caption")
+                    # use_actual_mask = gr.Checkbox(value=False, label="Use actual mask for inpainting", visible=False)
+                    with gr.Row():
+                        fix_seed = gr.Checkbox(value=True, label="Fixed seed")
+                        rand_seed = gr.Slider(minimum=0, maximum=1000, step=1, value=0, label="Seed")
+        with gr.Column(scale=4):
+            gr.HTML('<span style="font-size: 20px; font-weight: bold">Generated Images</span>')
+            with gr.Row():
+                out_gen_1 = gr.Image(type="pil", visible=True, show_label=False, label="Generated Image")
+                out_gen_2 = gr.Image(type="pil", visible=True, show_label=False)
+            with gr.Row():
+                out_gen_3 = gr.Image(type="pil", visible=False, show_label=False)
+                out_gen_4 = gr.Image(type="pil", visible=False, show_label=False)
+        state = gr.State({})
+        class Controller:
+            def __init__(self):
+                self.calls = 0
+                self.tracks = 0
+                self.resizes = 0
+                self.scales = 0
+            def init_white(self, init_white_trigger):
+                self.calls += 1
+                return np.ones((512, 512), dtype='uint8') * 255, 1.0, init_white_trigger + 1
+            def change_n_samples(self, n_samples):
+                blank_samples = n_samples % 2 if n_samples > 1 else 0
+                return [gr.Image.update(visible=True) for _ in range(n_samples + blank_samples)] \
+                    + [gr.Image.update(visible=False) for _ in range(4 - n_samples - blank_samples)]
+            def resize_centercrop(self, state):
+                self.resizes += 1
+                image = state['original_image'].copy()
+                inpaint_hw = int(0.9 * min(*image.shape[:2]))
+                state['inpaint_hw'] = inpaint_hw
+                image_cc = center_crop(image, inpaint_hw)
+                # print(f'resize triggered {self.resizes}', image.shape, '->', image_cc.shape)
+                return image_cc, state
+            def resize_masked(self, state):
+                self.resizes += 1
+                image = state['original_image'].copy()
+                inpaint_hw = int(0.9 * min(*image.shape[:2]))
+                state['inpaint_hw'] = inpaint_hw
+                image_mask = sized_center_mask(image, inpaint_hw, inpaint_hw)
+                state['masked_image'] = image_mask.copy()
+                # print(f'mask triggered {self.resizes}')
+                return image_mask, state
+            def switch_task_hide_cond(self, task):
+                cond = False
+                if task == "Grounded Generation":
+                    cond = True
+                return gr.Checkbox.update(visible=cond, value=False), gr.Image.update(value=None,
+                                                                                      visible=False), gr.Slider.update(
+                    visible=cond), gr.Checkbox.update(visible=(not cond), value=False)
+        controller = Controller()
+        main.load(
+            lambda x: x + 1,
+            inputs=sketch_pad_trigger,
+            outputs=sketch_pad_trigger,
+            queue=False)
+        sketch_pad.edit(
+            draw,
+            inputs=[sketch_pad, sketch_pad, grounding_instruction, sketch_pad_resize_trigger, state],
+            outputs=[out_imagebox, sketch_pad_resize_trigger, image_scale, state],
+            queue=False,
+        )
+        grounding_instruction.change(
+            draw,
+            inputs=[sketch_pad, sketch_pad, grounding_instruction, sketch_pad_resize_trigger, state],
+            outputs=[out_imagebox, sketch_pad_resize_trigger, image_scale, state],
+            queue=False,
+        )
+        clear_btn.click(
+            clear,
+            inputs=[sketch_pad_trigger, sketch_pad_trigger, batch_size, state],
+            outputs=[sketch_pad, sketch_pad_trigger, out_imagebox, image_scale, out_gen_1, out_gen_2, out_gen_3,
+                     out_gen_4, state],
+            queue=False)
+        # task.change(
+        #     partial(clear, switch_task=True),
+        #     inputs=[task, sketch_pad_trigger, batch_size, state],
+        #     outputs=[sketch_pad, sketch_pad_trigger, out_imagebox, image_scale, out_gen_1, out_gen_2, out_gen_3,
+        #              out_gen_4, state],
+        #     queue=False)
+        sketch_pad_trigger.change(
+            controller.init_white,
+            inputs=[init_white_trigger],
+            outputs=[sketch_pad, image_scale, init_white_trigger],
+            queue=False)
+        sketch_pad_resize_trigger.change(
+            controller.resize_masked,
+            inputs=[state],
+            outputs=[sketch_pad, state],
+            queue=False)
+        batch_size.change(
+            controller.change_n_samples,
+            inputs=[batch_size],
+            outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4],
+            queue=False)
+        batch_size.change(
+            controller.change_n_samples,
+            inputs=[batch_size],
+            outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4],
+            queue=False)
+        gen_btn.click(
+            generate,
+            inputs=[
+                language_instruction, language_instruction, grounding_instruction, sketch_pad,
+                loss_threshold, guidance_scale, batch_size,
+                fix_seed, rand_seed,
+                max_step,
+                Loss_scale, max_iter,
+                state,
+            ],
+            outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, state],
+            queue=True
+        )
+        sketch_pad_resize_trigger.change(
+            None,
+            None,
+            sketch_pad_resize_trigger,
+            _js=rescale_js,
+            queue=False)
+        init_white_trigger.change(
+            None,
+            None,
+            init_white_trigger,
+            _js=rescale_js,
+            queue=False)
+    with gr.Column():
+        gr.Examples(
+            examples=[
+                [
+                    "images/input.png",
+                    "A hello kitty toy is playing with a purple ball.",
+                    "hello kitty;ball",
+                    "images/hello_kitty_results.png"
+                ],
+            ],
+            inputs=[sketch_pad, language_instruction, grounding_instruction, out_gen_1],
+            outputs=None,
+            fn=None,
+            cache_examples=False,
+        )
+main.queue(concurrency_count=1, api_open=False)
+main.launch(share=False, show_api=False, show_error=True)

images/hello_kitty_results.png ADDED Viewed

images/input.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+torch==1.13.1
+torchvision==0.14.1
+xformers==0.0.16
+omegaconf==2.1.1
+albumentations==1.3.0
+opencv-python
+imageio==2.9.0
+imageio-ffmpeg==0.4.2
+pytorch-lightning==1.4.2
+test-tube>=0.7.5
+streamlit==1.17.0
+einops==0.3.0
+git+https://github.com/openai/CLIP.git
+protobuf~=3.20.1
+torchmetrics==0.6.0
+transformers==4.19.2
+kornia==0.6.0
+gradio==3.19.1