EditAnything

Runtime error

App Files Files Community

shgao commited on May 14, 2023

Commit

2474e74

1 Parent(s): 03a83ad

update new versiion

Browse files

Files changed (8) hide show

app.py +14 -13
requirements.txt +1 -1
sam2edit.py +14 -68
sam2edit_beauty.py +18 -66
sam2edit_demo.py +140 -0
sam2edit_handsome.py +17 -67
sam2edit_lora.py +143 -62
utils/stable_diffusion_controlnet_inpaint.py +9 -5

app.py CHANGED Viewed

@@ -15,9 +15,7 @@ SHARED_UI_WARNING = f'''### [NOTE]  Inference may be slow in this shared UI.
 You can duplicate and use it with a paid private GPU.
 <a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/jyseo/3DFuse?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-xl-dark.svg" alt="Duplicate Space"></a>
 '''
-#
-sam_generator = init_sam_model()
 blip_processor = init_blip_processor()
 blip_model = init_blip_model()
@@ -31,30 +29,33 @@ with gr.Blocks() as demo:
                                           controlmodel_name='LAION Pretrained(v0-4)-SD21',
                                           lora_model_path=None, use_blip=True, extra_inpaint=False,
                                           sam_generator=sam_generator,
                                           blip_processor=blip_processor,
                                           blip_model=blip_model)
-            create_demo_edit_anything(model.process)
         with gr.TabItem(' 👩‍🦰Beauty Edit/Generation'):
             lora_model_path = hf_hub_download(
                 "mlida/Cute_girl_mix4", "cuteGirlMix4_v10.safetensors")
             model = EditAnythingLoraModel(base_model_path=os.path.join(sd_models_path, "chilloutmix_NiPrunedFp32Fix"),
                                           lora_model_path=lora_model_path, use_blip=True, extra_inpaint=True,
                                           sam_generator=sam_generator,
                                           blip_processor=blip_processor,
                                           blip_model=blip_model,
                                           lora_weight=0.5,
                                           )
-            create_demo_beauty(model.process)
-        # with gr.TabItem(' 👨‍🌾Handsome Edit/Generation'):
-        #     model = EditAnythingLoraModel(base_model_path=os.path.join(sd_models_path, "Realistic_Vision_V2.0"),
-        #                                   lora_model_path=None, use_blip=True, extra_inpaint=True,
-        #                                   sam_generator=sam_generator,
-        #                                   blip_processor=blip_processor,
-        #                                   blip_model=blip_model)
-        #     create_demo_handsome(model.process)
         # with gr.TabItem('Generate Anything'):
         #     create_demo_generate_anything()
     with gr.Tabs():
         gr.Markdown(SHARED_UI_WARNING)
-demo.queue(api_open=False).launch()

 You can duplicate and use it with a paid private GPU.
 <a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/jyseo/3DFuse?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-xl-dark.svg" alt="Duplicate Space"></a>
 '''
+sam_generator, mask_predictor = init_sam_model()
 blip_processor = init_blip_processor()
 blip_model = init_blip_model()
                                           controlmodel_name='LAION Pretrained(v0-4)-SD21',
                                           lora_model_path=None, use_blip=True, extra_inpaint=False,
                                           sam_generator=sam_generator,
+                                          mask_predictor=mask_predictor,
                                           blip_processor=blip_processor,
                                           blip_model=blip_model)
+            create_demo_edit_anything(model.process, model.process_image_click)
         with gr.TabItem(' 👩‍🦰Beauty Edit/Generation'):
             lora_model_path = hf_hub_download(
                 "mlida/Cute_girl_mix4", "cuteGirlMix4_v10.safetensors")
             model = EditAnythingLoraModel(base_model_path=os.path.join(sd_models_path, "chilloutmix_NiPrunedFp32Fix"),
                                           lora_model_path=lora_model_path, use_blip=True, extra_inpaint=True,
                                           sam_generator=sam_generator,
+                                          mask_predictor=mask_predictor,
                                           blip_processor=blip_processor,
                                           blip_model=blip_model,
                                           lora_weight=0.5,
                                           )
+            create_demo_beauty(model.process, model.process_image_click)
+        with gr.TabItem(' 👨‍🌾Handsome Edit/Generation'):
+            model = EditAnythingLoraModel(base_model_path=os.path.join(sd_models_path, "Realistic_Vision_V2.0"),
+                                          lora_model_path=None, use_blip=True, extra_inpaint=True,
+                                          sam_generator=sam_generator,
+                                          mask_predictor=mask_predictor,
+                                          blip_processor=blip_processor,
+                                          blip_model=blip_model)
+            create_demo_handsome(model.process, model.process_image_click)
         # with gr.TabItem('Generate Anything'):
         #     create_demo_generate_anything()
     with gr.Tabs():
         gr.Markdown(SHARED_UI_WARNING)
+demo.queue(api_open=False).launch(server_name='0.0.0.0', share=False)

requirements.txt CHANGED Viewed

@@ -3,7 +3,7 @@ torch==1.13.1+cu117
 torchvision==0.14.1+cu117
 torchaudio==0.13.1
 numpy==1.23.1
-gradio==3.25.0
 gradio_client==0.1.4
 albumentations==1.3.0
 opencv-contrib-python==4.3.0.36

 torchvision==0.14.1+cu117
 torchaudio==0.13.1
 numpy==1.23.1
+gradio==3.30.0
 gradio_client==0.1.4
 albumentations==1.3.0
 opencv-contrib-python==4.3.0.36

sam2edit.py CHANGED Viewed

@@ -1,82 +1,28 @@
 # Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
 import gradio as gr
 from diffusers.utils import load_image
 from sam2edit_lora import EditAnythingLoraModel, config_dict
-def create_demo(process):
-    print("The GUI is not fully tested yet. Please open an issue if you find bugs.")
-    WARNING_INFO = f'''### [NOTE]  the model is collected from the Internet for demo only, please do not use it for commercial purposes.
-    We are not responsible for possible risks using this model.
     '''
-    block = gr.Blocks()
-    with block as demo:
-        with gr.Row():
-            gr.Markdown(
-                "## EditAnything https://github.com/sail-sg/EditAnything ")
-        with gr.Row():
-            with gr.Column():
-                source_image = gr.Image(
-                    source='upload', label="Image (Upload an image and cover the region you want to edit with sketch)",  type="numpy", tool="sketch")
-                enable_all_generate = gr.Checkbox(
-                    label='Auto generation on all region.', value=False)
-                prompt = gr.Textbox(
-                    label="Prompt (Text in the expected things of edited region)")
-                enable_auto_prompt = gr.Checkbox(
-                    label='Auto generate text prompt from input image with BLIP2: Warning: Enable this may makes your prompt not working.', value=False)
-                a_prompt = gr.Textbox(
-                    label="Added Prompt", value='best quality, extremely detailed')
-                n_prompt = gr.Textbox(label="Negative Prompt",
-                                      value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
-                control_scale = gr.Slider(
-                    label="Mask Align strength (Large value means more strict alignment with SAM mask)", minimum=0, maximum=1, value=1, step=0.1)
-                run_button = gr.Button(label="Run")
-                num_samples = gr.Slider(
-                    label="Images", minimum=1, maximum=12, value=2, step=1)
-                seed = gr.Slider(label="Seed", minimum=-1,
-                                 maximum=2147483647, step=1, randomize=True)
-                enable_tile = gr.Checkbox(
-                    label='Tile refinement for high resolution generation.', value=True)
-                with gr.Accordion("Advanced options", open=False):
-                    mask_image = gr.Image(
-                        source='upload', label="(Optional) Upload a predefined mask of edit region if you do not want to write your prompt.", type="numpy", value=None)
-                    image_resolution = gr.Slider(
-                        label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
-                    strength = gr.Slider(
-                        label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
-                    guess_mode = gr.Checkbox(
-                        label='Guess Mode', value=False)
-                    detect_resolution = gr.Slider(
-                        label="SAM Resolution", minimum=128, maximum=2048, value=1024, step=1)
-                    ddim_steps = gr.Slider(
-                        label="Steps", minimum=1, maximum=100, value=30, step=1)
-                    scale = gr.Slider(
-                        label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
-                    eta = gr.Number(label="eta (DDIM)", value=0.0)
-            with gr.Column():
-                result_gallery = gr.Gallery(
-                    label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
-                result_text = gr.Text(label='BLIP2+Human Prompt Text')
-        ips = [source_image, enable_all_generate, mask_image, control_scale, enable_auto_prompt, prompt, a_prompt, n_prompt, num_samples, image_resolution,
-               detect_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, enable_tile]
-        run_button.click(fn=process, inputs=ips, outputs=[
-            result_gallery, result_text])
-        # with gr.Row():
-        #     ex = gr.Examples(examples=examples, fn=process,
-        #                      inputs=[a_prompt, n_prompt, scale],
-        #                      outputs=[result_gallery],
-        #                      cache_examples=False)
-        with gr.Row():
-            gr.Markdown(WARNING_INFO)
     return demo
 if __name__ == '__main__':
-    model = EditAnythingLoraModel(base_model_path="stabilityai/stable-diffusion-2-inpainting",
-                                  controlmodel_name='LAION Pretrained(v0-4)-SD21', extra_inpaint=False,
                                   lora_model_path=None, use_blip=True)
-    demo = create_demo(model.process)
     demo.queue().launch(server_name='0.0.0.0')

 # Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
+import os
 import gradio as gr
 from diffusers.utils import load_image
 from sam2edit_lora import EditAnythingLoraModel, config_dict
+from sam2edit_demo import create_demo_template
+from huggingface_hub import hf_hub_download, snapshot_download
+def create_demo(process, process_image_click=None):
+    examples = None
+    INFO = f'''
+    ## EditAnything https://github.com/sail-sg/EditAnything
     '''
+    WARNING_INFO = None
+    demo = create_demo_template(process, process_image_click, examples=examples,
+                                    INFO=INFO, WARNING_INFO=WARNING_INFO, enable_auto_prompt_default=True)
     return demo
 if __name__ == '__main__':
+    model = EditAnythingLoraModel(base_model_path="stabilityai/stable-diffusion-2",
+                                  controlmodel_name='LAION Pretrained(v0-4)-SD21', extra_inpaint=True,
                                   lora_model_path=None, use_blip=True)
+    demo = create_demo(model.process, model.process_image_click)
     demo.queue().launch(server_name='0.0.0.0')

sam2edit_beauty.py CHANGED Viewed

@@ -1,10 +1,13 @@
 # Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
 import gradio as gr
 from diffusers.utils import load_image
 from sam2edit_lora import EditAnythingLoraModel, config_dict
-def create_demo(process):
     examples = [
         ["dudou,1girl, beautiful face, solo, candle, brown hair, long hair, <lora:flowergirl:0.9>,ulzzang-6500-v1.1,(raw photo:1.2),((photorealistic:1.4))best quality ,masterpiece, illustration, an extremely delicate and beautiful, extremely detailed ,CG ,unity ,8k wallpaper, Amazing, finely detail, masterpiece,best quality,official art,extremely detailed CG unity 8k wallpaper,absurdres, incredibly absurdres, huge filesize, ultra-detailed, highres, extremely detailed,beautiful detailed girl, extremely detailed eyes and face, beautiful detailed eyes,cinematic lighting,1girl,see-through,looking at viewer,full body,full-body shot,outdoors,arms behind back,(chinese clothes) <lora:cuteGirlMix4_v10:1>",
@@ -16,77 +19,26 @@ def create_demo(process):
         ["mix4, whole body shot, ((8k, RAW photo, highest quality, masterpiece), High detail RAW color photo professional close-up photo, shy expression, cute, beautiful detailed girl, detailed fingers, extremely detailed eyes and face, beautiful detailed nose, beautiful detailed eyes, long eyelashes, light on face, looking at viewer, (closed mouth:1.2), 1girl, cute, young, mature face, (full body:1.3), ((small breasts)), realistic face, realistic body, beautiful detailed thigh,s, same eyes color, (realistic, photo realism:1. 37), (highest quality), (best shadow), (best illustration), ultra high resolution, physics-based rendering, cinematic lighting), solo, 1girl, highly detailed, in office, detailed office, open cardigan, ponytail contorted, beautiful eyes ,sitting in office,dating, business suit, cross-laced clothes, collared shirt, beautiful breast, small breast, Chinese dress, white pantyhose, natural breasts, pink and white hair, <lora:cuteGirlMix4_v10:1>",
          "paintings, sketches, (worst quality:2), (low quality:2), (normal quality:2), cloth, underwear, bra, low-res, normal quality, ((monochrome)), ((grayscale)), skin spots, acne, skin blemishes, age spots, glans, bad nipples, long nipples, bad vagina, extra fingers,fewer fingers,strange fingers,bad hand, ng_deepnegative_v1_75t, bad-picture-chill-75v", 7]
     ]
-    print("The GUI is not fully tested yet. Please open an issue if you find bugs.")
     WARNING_INFO = f'''### [NOTE]  the model is collected from the Internet for demo only, please do not use it for commercial purposes.
     We are not responsible for possible risks using this model.
     Lora model from https://civitai.com/models/14171/cutegirlmix4 Thanks!
     '''
-    block = gr.Blocks()
-    with block as demo:
-        with gr.Row():
-            gr.Markdown(
-                "## Generate Your Beauty powered by EditAnything https://github.com/sail-sg/EditAnything ")
-        with gr.Row():
-            with gr.Column():
-                source_image = gr.Image(
-                    source='upload', label="Image (Upload an image and cover the region you want to edit with sketch)",  type="numpy", tool="sketch")
-                enable_all_generate = gr.Checkbox(
-                    label='Auto generation on all region.', value=False)
-                prompt = gr.Textbox(
-                    label="Prompt (Text in the expected things of edited region)")
-                enable_auto_prompt = gr.Checkbox(
-                    label='Auto generate text prompt from input image with BLIP2: Warning: Enable this may makes your prompt not working.', value=False)
-                a_prompt = gr.Textbox(
-                    label="Added Prompt", value='best quality, extremely detailed')
-                n_prompt = gr.Textbox(label="Negative Prompt",
-                                      value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
-                control_scale = gr.Slider(
-                    label="Mask Align strength (Large value means more strict alignment with SAM mask)", minimum=0, maximum=1, value=1, step=0.1)
-                run_button = gr.Button(label="Run")
-                num_samples = gr.Slider(
-                    label="Images", minimum=1, maximum=12, value=2, step=1)
-                seed = gr.Slider(label="Seed", minimum=-1,
-                                 maximum=2147483647, step=1, randomize=True)
-                enable_tile = gr.Checkbox(
-                    label='Tile refinement for high resolution generation.', value=True)
-                with gr.Accordion("Advanced options", open=False):
-                    mask_image = gr.Image(
-                        source='upload', label="(Optional) Upload a predefined mask of edit region if you do not want to write your prompt.", type="numpy", value=None)
-                    image_resolution = gr.Slider(
-                        label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
-                    strength = gr.Slider(
-                        label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
-                    guess_mode = gr.Checkbox(
-                        label='Guess Mode', value=False)
-                    detect_resolution = gr.Slider(
-                        label="SAM Resolution", minimum=128, maximum=2048, value=1024, step=1)
-                    ddim_steps = gr.Slider(
-                        label="Steps", minimum=1, maximum=100, value=30, step=1)
-                    scale = gr.Slider(
-                        label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
-                    eta = gr.Number(label="eta (DDIM)", value=0.0)
-            with gr.Column():
-                result_gallery = gr.Gallery(
-                    label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
-                result_text = gr.Text(label='BLIP2+Human Prompt Text')
-        ips = [source_image, enable_all_generate, mask_image, control_scale, enable_auto_prompt, prompt, a_prompt, n_prompt, num_samples, image_resolution,
-               detect_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, enable_tile]
-        run_button.click(fn=process, inputs=ips, outputs=[
-            result_gallery, result_text])
-        with gr.Row():
-            ex = gr.Examples(examples=examples, fn=process,
-                             inputs=[a_prompt, n_prompt, scale],
-                             outputs=[result_gallery],
-                             cache_examples=False)
-        with gr.Row():
-            gr.Markdown(WARNING_INFO)
     return demo
 if __name__ == '__main__':
-    model = EditAnythingLoraModel(base_model_path='../chilloutmix_NiPrunedFp32Fix',
-                                  lora_model_path='../40806/mix4', use_blip=True, lora_weight=0.5)
-    demo = create_demo(model.process)
     demo.queue().launch(server_name='0.0.0.0')

 # Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
+import os
 import gradio as gr
 from diffusers.utils import load_image
 from sam2edit_lora import EditAnythingLoraModel, config_dict
+from sam2edit_demo import create_demo_template
+from huggingface_hub import hf_hub_download, snapshot_download
+def create_demo(process, process_image_click=None):
     examples = [
         ["dudou,1girl, beautiful face, solo, candle, brown hair, long hair, <lora:flowergirl:0.9>,ulzzang-6500-v1.1,(raw photo:1.2),((photorealistic:1.4))best quality ,masterpiece, illustration, an extremely delicate and beautiful, extremely detailed ,CG ,unity ,8k wallpaper, Amazing, finely detail, masterpiece,best quality,official art,extremely detailed CG unity 8k wallpaper,absurdres, incredibly absurdres, huge filesize, ultra-detailed, highres, extremely detailed,beautiful detailed girl, extremely detailed eyes and face, beautiful detailed eyes,cinematic lighting,1girl,see-through,looking at viewer,full body,full-body shot,outdoors,arms behind back,(chinese clothes) <lora:cuteGirlMix4_v10:1>",
         ["mix4, whole body shot, ((8k, RAW photo, highest quality, masterpiece), High detail RAW color photo professional close-up photo, shy expression, cute, beautiful detailed girl, detailed fingers, extremely detailed eyes and face, beautiful detailed nose, beautiful detailed eyes, long eyelashes, light on face, looking at viewer, (closed mouth:1.2), 1girl, cute, young, mature face, (full body:1.3), ((small breasts)), realistic face, realistic body, beautiful detailed thigh,s, same eyes color, (realistic, photo realism:1. 37), (highest quality), (best shadow), (best illustration), ultra high resolution, physics-based rendering, cinematic lighting), solo, 1girl, highly detailed, in office, detailed office, open cardigan, ponytail contorted, beautiful eyes ,sitting in office,dating, business suit, cross-laced clothes, collared shirt, beautiful breast, small breast, Chinese dress, white pantyhose, natural breasts, pink and white hair, <lora:cuteGirlMix4_v10:1>",
          "paintings, sketches, (worst quality:2), (low quality:2), (normal quality:2), cloth, underwear, bra, low-res, normal quality, ((monochrome)), ((grayscale)), skin spots, acne, skin blemishes, age spots, glans, bad nipples, long nipples, bad vagina, extra fingers,fewer fingers,strange fingers,bad hand, ng_deepnegative_v1_75t, bad-picture-chill-75v", 7]
     ]
+    INFO = f'''
+    ## Generate Your Beauty powered by EditAnything https://github.com/sail-sg/EditAnything
+    This model is good at generating beautiful female.
+    '''
     WARNING_INFO = f'''### [NOTE]  the model is collected from the Internet for demo only, please do not use it for commercial purposes.
     We are not responsible for possible risks using this model.
     Lora model from https://civitai.com/models/14171/cutegirlmix4 Thanks!
     '''
+    demo = create_demo_template(process, process_image_click,
+                                examples=examples, INFO=INFO, WARNING_INFO=WARNING_INFO)
     return demo
 if __name__ == '__main__':
+    sd_models_path = snapshot_download("shgao/sdmodels")
+    lora_model_path = hf_hub_download(
+        "mlida/Cute_girl_mix4", "cuteGirlMix4_v10.safetensors")
+    model = EditAnythingLoraModel(base_model_path=os.path.join(sd_models_path, "chilloutmix_NiPrunedFp32Fix"),
+                                  lora_model_path=lora_model_path, use_blip=True, extra_inpaint=True,
+                                  lora_weight=0.5,
+                                  )
+    demo = create_demo(model.process, model.process_image_click)
     demo.queue().launch(server_name='0.0.0.0')

sam2edit_demo.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
+import gradio as gr
+def create_demo_template(process, process_image_click=None, examples=None,
+                            INFO='EditAnything https://github.com/sail-sg/EditAnything', WARNING_INFO=None,
+                            enable_auto_prompt_default=False,
+                            ):
+    print("The GUI is not fully tested yet. Please open an issue if you find bugs.")
+    block = gr.Blocks()
+    with block as demo:
+        clicked_points = gr.State([])
+        origin_image = gr.State(None)
+        click_mask = gr.State(None)
+        with gr.Row():
+            gr.Markdown(INFO)
+        with gr.Row().style(equal_height=False):
+            with gr.Column():
+                with gr.Tab("Click🖱"):
+                    source_image_click = gr.Image(
+                        type="pil", interactive=True,
+                        label="Image: Upload an image and click the region you want to edit.",
+                    )
+                    with gr.Column():
+                        with gr.Row():
+                            point_prompt = gr.Radio(
+                                choices=["Foreground Point", "Background Point"],
+                                value="Foreground Point",
+                                label="Point Label",
+                                interactive=True, show_label=False)
+                            clear_button_click = gr.Button(
+                                value="Clear Click Points", interactive=True)
+                            clear_button_image = gr.Button(
+                                value="Clear Image", interactive=True)
+                        with gr.Row():
+                            run_button_click = gr.Button(
+                                label="Run EditAnying", interactive=True)
+                with gr.Tab("Brush🖌️"):
+                    source_image_brush = gr.Image(
+                        source='upload',
+                        label="Image: Upload an image and cover the region you want to edit with sketch",
+                        type="numpy", tool="sketch"
+                    )
+                    run_button = gr.Button(label="Run EditAnying", interactive=True)
+                with gr.Column():
+                    enable_all_generate = gr.Checkbox(
+                        label='Auto generation on all region.', value=False)
+                    control_scale = gr.Slider(
+                        label="Mask Align strength", info="Large value -> strict alignment with SAM mask", minimum=0, maximum=1, value=1, step=0.1)
+                with gr.Column():
+                    enable_auto_prompt = gr.Checkbox(
+                        label='Auto generate text prompt from input image with BLIP2', info='Warning: Enable this may makes your prompt not working.', value=enable_auto_prompt_default)
+                    a_prompt = gr.Textbox(
+                        label="Positive Prompt", info='Text in the expected things of edited region', value='best quality, extremely detailed')
+                    n_prompt = gr.Textbox(label="Negative Prompt",
+                                        value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, NSFW')
+                with gr.Row():
+                    num_samples = gr.Slider(
+                        label="Images", minimum=1, maximum=12, value=2, step=1)
+                    seed = gr.Slider(label="Seed", minimum=-1,
+                                    maximum=2147483647, step=1, randomize=True)
+                with gr.Row():
+                    enable_tile = gr.Checkbox(
+                        label='Tile refinement for high resolution generation', info='Slow inference', value=True)
+                    refine_alignment_ratio = gr.Slider(
+                        label="Alignment Strength", info='Large value -> strict alignment with input image. Small value -> strong global consistency', minimum=0.0, maximum=1.0, value=0.95, step=0.05)
+                with gr.Accordion("Advanced options", open=False):
+                    mask_image = gr.Image(
+                        source='upload', label="Upload a predefined mask of edit region if you do not want to write your prompt.", info="(Optional:Switch to Brush mode when using this!) ", type="numpy", value=None)
+                    image_resolution = gr.Slider(
+                        label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
+                    refine_image_resolution = gr.Slider(
+                        label="Image Resolution", minimum=256, maximum=8192, value=1024, step=64)
+                    strength = gr.Slider(
+                        label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
+                    guess_mode = gr.Checkbox(
+                        label='Guess Mode', value=False)
+                    detect_resolution = gr.Slider(
+                        label="SAM Resolution", minimum=128, maximum=2048, value=1024, step=1)
+                    ddim_steps = gr.Slider(
+                        label="Steps", minimum=1, maximum=100, value=30, step=1)
+                    scale = gr.Slider(
+                        label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
+                    eta = gr.Number(label="eta (DDIM)", value=0.0)
+            with gr.Column():
+                result_gallery_refine = gr.Gallery(
+                    label='Output High quality', show_label=True, elem_id="gallery").style(grid=2, preview=False)
+                result_gallery_init = gr.Gallery(
+                    label='Output Low quality', show_label=True, elem_id="gallery").style(grid=2, height='auto')
+                result_gallery_ref = gr.Gallery(
+                    label='Output Ref', show_label=False, elem_id="gallery").style(grid=2, height='auto')
+                result_text = gr.Text(label='BLIP2+Human Prompt Text')
+        ips = [source_image_brush, enable_all_generate, mask_image, control_scale, enable_auto_prompt, a_prompt, n_prompt, num_samples, image_resolution,
+               detect_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, enable_tile, refine_alignment_ratio, refine_image_resolution]
+        run_button.click(fn=process, inputs=ips, outputs=[
+            result_gallery_refine, result_gallery_init, result_gallery_ref, result_text])
+        ip_click = [origin_image, enable_all_generate, click_mask, control_scale, enable_auto_prompt, a_prompt, n_prompt, num_samples, image_resolution,
+                    detect_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, enable_tile, refine_alignment_ratio, refine_image_resolution]
+        run_button_click.click(fn=process,
+                               inputs=ip_click,
+                               outputs=[result_gallery_refine, result_gallery_init, result_gallery_ref, result_text])
+        source_image_click.upload(
+            lambda image: image.copy() if image is not None else None,
+            inputs=[source_image_click],
+            outputs=[origin_image]
+        )
+        source_image_click.select(
+            process_image_click,
+            inputs=[origin_image, point_prompt,
+                    clicked_points, image_resolution],
+            outputs=[source_image_click, clicked_points, click_mask],
+            show_progress=True, queue=True
+        )
+        clear_button_click.click(
+            fn=lambda original_image: (original_image.copy(), [], None)
+            if original_image is not None else (None, [], None),
+            inputs=[origin_image],
+            outputs=[source_image_click, clicked_points, click_mask]
+        )
+        clear_button_image.click(
+            fn=lambda: (None, [], None, None, None),
+            inputs=[],
+            outputs=[source_image_click, clicked_points,
+                     click_mask, result_gallery_init, result_text]
+        )
+        if examples is not None:
+            with gr.Row():
+                ex = gr.Examples(examples=examples, fn=process,
+                                 inputs=[a_prompt, n_prompt, scale],
+                                 outputs=[result_gallery_init],
+                                 cache_examples=False)
+        if WARNING_INFO is not None:
+            with gr.Row():
+                gr.Markdown(WARNING_INFO)
+    return demo

sam2edit_handsome.py CHANGED Viewed

@@ -1,87 +1,37 @@
 # Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
 import gradio as gr
 from diffusers.utils import load_image
 from sam2edit_lora import EditAnythingLoraModel, config_dict
-def create_demo(process):
     examples = [
-        ["1man, muscle,full body, vest, short straight hair, glasses, Gym, barbells, dumbbells, treadmills, boxing rings, squat racks, plates, dumbbell racks soft lighting, masterpiece, best quality, 8k uhd, film grain, Fujifilm XT3 photorealistic painting art by midjourney and greg rutkowski <lora:asianmale_v10:0.6>", "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck", 6],
-        ["1man, 25 years- old, full body, wearing long-sleeve white shirt and tie, muscular rand black suit, soft lighting, masterpiece, best quality, 8k uhd, dslr, film grain, Fujifilm XT3 photorealistic painting art by midjourney and greg rutkowski <lora:asianmale_v10:0.6> <lora:uncutPenisLora_v10:0.6>","(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck",6],
     ]
     print("The GUI is not fully tested yet. Please open an issue if you find bugs.")
     WARNING_INFO = f'''### [NOTE]  the model is collected from the Internet for demo only, please do not use it for commercial purposes.
     We are not responsible for possible risks using this model.
     Base model from https://huggingface.co/SG161222/Realistic_Vision_V2.0 Thanks!
     '''
-    block = gr.Blocks()
-    with block as demo:
-        with gr.Row():
-            gr.Markdown(
-                "## Generate Your Handsome powered by EditAnything https://github.com/sail-sg/EditAnything ")
-        with gr.Row():
-            with gr.Column():
-                source_image = gr.Image(
-                    source='upload', label="Image (Upload an image and cover the region you want to edit with sketch)",  type="numpy", tool="sketch")
-                enable_all_generate = gr.Checkbox(
-                    label='Auto generation on all region.', value=False)
-                prompt = gr.Textbox(
-                    label="Prompt (Text in the expected things of edited region)")
-                enable_auto_prompt = gr.Checkbox(
-                    label='Auto generate text prompt from input image with BLIP2: Warning: Enable this may makes your prompt not working.', value=False)
-                a_prompt = gr.Textbox(
-                    label="Added Prompt", value='best quality, extremely detailed')
-                n_prompt = gr.Textbox(label="Negative Prompt",
-                                        value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
-                control_scale = gr.Slider(
-                    label="Mask Align strength (Large value means more strict alignment with SAM mask)", minimum=0, maximum=1, value=1, step=0.1)
-                run_button = gr.Button(label="Run")
-                num_samples = gr.Slider(
-                    label="Images", minimum=1, maximum=12, value=2, step=1)
-                seed = gr.Slider(label="Seed", minimum=-1,
-                                    maximum=2147483647, step=1, randomize=True)
-                enable_tile = gr.Checkbox(
-                    label='Tile refinement for high resolution generation.', value=True)
-                with gr.Accordion("Advanced options", open=False):
-                    mask_image = gr.Image(
-                        source='upload', label="(Optional) Upload a predefined mask of edit region if you do not want to write your prompt.", type="numpy", value=None)
-                    image_resolution = gr.Slider(
-                        label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
-                    strength = gr.Slider(
-                        label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
-                    guess_mode = gr.Checkbox(
-                        label='Guess Mode', value=False)
-                    detect_resolution = gr.Slider(
-                        label="SAM Resolution", minimum=128, maximum=2048, value=1024, step=1)
-                    ddim_steps = gr.Slider(
-                        label="Steps", minimum=1, maximum=100, value=30, step=1)
-                    scale = gr.Slider(
-                        label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
-                    eta = gr.Number(label="eta (DDIM)", value=0.0)
-            with gr.Column():
-                result_gallery = gr.Gallery(
-                    label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
-                result_text = gr.Text(label='BLIP2+Human Prompt Text')
-        ips = [source_image, enable_all_generate, mask_image, control_scale, enable_auto_prompt, prompt, a_prompt, n_prompt, num_samples, image_resolution,
-                detect_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, enable_tile]
-        run_button.click(fn=process, inputs=ips, outputs=[
-                            result_gallery, result_text])
-        with gr.Row():
-            ex = gr.Examples(examples=examples, fn=process,
-                                 inputs=[a_prompt, n_prompt, scale],
-                                 outputs=[result_gallery],
-                                 cache_examples=False)
-        with gr.Row():
-            gr.Markdown(WARNING_INFO)
     return demo
 if __name__ == '__main__':
-    model = EditAnythingLoraModel(base_model_path= '../../gradio-rel/EditAnything/models/Realistic_Vision_V2.0',
-                 lora_model_path= '../../gradio-rel/EditAnything/models/asianmale', use_blip=True)
-    demo = create_demo(model.process)
     demo.queue().launch(server_name='0.0.0.0')

 # Edit Anything trained with Stable Diffusion + ControlNet + SAM  + BLIP2
+import os
 import gradio as gr
 from diffusers.utils import load_image
 from sam2edit_lora import EditAnythingLoraModel, config_dict
+from sam2edit_demo import create_demo_template
+from huggingface_hub import hf_hub_download, snapshot_download
+def create_demo(process, process_image_click=None):
     examples = [
+        ["1man, muscle,full body, vest, short straight hair, glasses, Gym, barbells, dumbbells, treadmills, boxing rings, squat racks, plates, dumbbell racks soft lighting, masterpiece, best quality, 8k uhd, film grain, Fujifilm XT3 photorealistic painting art by midjourney and greg rutkowski <lora:asianmale_v10:0.6>",
+            "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck", 6],
+        ["1man, 25 years- old, full body, wearing long-sleeve white shirt and tie, muscular rand black suit, soft lighting, masterpiece, best quality, 8k uhd, dslr, film grain, Fujifilm XT3 photorealistic painting art by midjourney and greg rutkowski <lora:asianmale_v10:0.6> <lora:uncutPenisLora_v10:0.6>",
+            "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck", 6],
     ]
     print("The GUI is not fully tested yet. Please open an issue if you find bugs.")
+    INFO = f'''
+    ## Generate Your Handsome powered by EditAnything https://github.com/sail-sg/EditAnything
+    This model is good at generating handsome male.
+    '''
     WARNING_INFO = f'''### [NOTE]  the model is collected from the Internet for demo only, please do not use it for commercial purposes.
     We are not responsible for possible risks using this model.
     Base model from https://huggingface.co/SG161222/Realistic_Vision_V2.0 Thanks!
     '''
+    demo = create_demo_template(process, process_image_click, examples=examples, INFO=INFO, WARNING_INFO=WARNING_INFO)
     return demo
 if __name__ == '__main__':
+    model = EditAnythingLoraModel(base_model_path='Realistic_Vision_V2.0',
+                                  lora_model_path=None, use_blip=True)
+    demo = create_demo(model.process, model.process_image_click)
     demo.queue().launch(server_name='0.0.0.0')

sam2edit_lora.py CHANGED Viewed

@@ -14,7 +14,7 @@ import random
 import os
 import requests
 from io import BytesIO
-from annotator.util import resize_image, HWC3
 import torch
 from safetensors.torch import load_file
@@ -22,7 +22,6 @@ from collections import defaultdict
 from diffusers import StableDiffusionControlNetPipeline
 from diffusers import ControlNetModel, UniPCMultistepScheduler
 from utils.stable_diffusion_controlnet_inpaint import StableDiffusionControlNetInpaintPipeline
-# from utils.tmp import StableDiffusionControlNetInpaintPipeline
 # need the latest transformers
 # pip install git+https://github.com/huggingface/transformers.git
 from transformers import AutoProcessor, Blip2ForConditionalGeneration
@@ -32,13 +31,13 @@ import PIL.Image
 # Segment-Anything init.
 # pip install git+https://github.com/facebookresearch/segment-anything.git
 try:
-    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
 except ImportError:
     print('segment_anything not installed')
     result = subprocess.run(
         ['pip', 'install', 'git+https://github.com/facebookresearch/segment-anything.git'], check=True)
     print(f'Install segment_anything {result}')
-    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
 if not os.path.exists('./models/sam_vit_h_4b8939.pth'):
     result = subprocess.run(
         ['wget', 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth', '-P', 'models'], check=True)
@@ -52,13 +51,18 @@ config_dict = OrderedDict([
 ])
-def init_sam_model():
     sam_checkpoint = "models/sam_vit_h_4b8939.pth"
     model_type = "default"
     sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
     sam.to(device=device)
-    sam_generator = SamAutomaticMaskGenerator(sam)
-    return sam_generator
 def init_blip_processor():
@@ -112,7 +116,6 @@ def get_pipeline_embeds(pipeline, prompt, negative_prompt, device):
     return torch.cat(concat_embeds, dim=1), torch.cat(neg_embeds, dim=1)
 def load_lora_weights(pipeline, checkpoint_path, multiplier, device, dtype):
     LORA_PREFIX_UNET = "lora_unet"
     LORA_PREFIX_TEXT_ENCODER = "lora_te"
@@ -241,10 +244,12 @@ def make_inpaint_condition(image, image_mask):
     image = torch.from_numpy(image)
     return image
 def obtain_generation_model(base_model_path, lora_model_path, controlnet_path, generation_only=False, extra_inpaint=True, lora_weight=1.0):
     controlnet = []
-    controlnet.append(ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)) # sam control
-    if (not generation_only) and extra_inpaint: # inpainting control
         print("Warning: ControlNet based inpainting model only support SD1.5 for now.")
         controlnet.append(
             ControlNetModel.from_pretrained(
@@ -271,17 +276,18 @@ def obtain_generation_model(base_model_path, lora_model_path, controlnet_path, g
     pipe.enable_model_cpu_offload()
     return pipe
 def obtain_tile_model(base_model_path, lora_model_path, lora_weight=1.0):
     controlnet = ControlNetModel.from_pretrained(
-                'lllyasviel/control_v11f1e_sd15_tile', torch_dtype=torch.float16) # tile controlnet
-    if base_model_path=='runwayml/stable-diffusion-v1-5' or base_model_path=='stabilityai/stable-diffusion-2-inpainting':
         print("base_model_path", base_model_path)
-        pipe = StableDiffusionControlNetPipeline.from_pretrained(
             "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None
         )
     else:
-        pipe = StableDiffusionControlNetPipeline.from_pretrained(
-             base_model_path, controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None
         )
     if lora_model_path is not None:
         pipe = load_lora_weights(
@@ -296,7 +302,6 @@ def obtain_tile_model(base_model_path, lora_model_path, lora_weight=1.0):
     return pipe
 def show_anns(anns):
     if len(anns) == 0:
         return
@@ -331,9 +336,11 @@ class EditAnythingLoraModel:
                  blip_model=None,
                  sam_generator=None,
                  controlmodel_name='LAION Pretrained(v0-4)-SD15',
-                 extra_inpaint=True, # used when the base model is not an inpainting model.
                  tile_model=None,
                  lora_weight=1.0,
                  ):
         self.device = device
         self.use_blip = use_blip
@@ -348,11 +355,8 @@ class EditAnythingLoraModel:
             base_model_path, lora_model_path, self.default_controlnet_path, generation_only=False, extra_inpaint=extra_inpaint, lora_weight=lora_weight)
         # Segment-Anything init.
-        if sam_generator is not None:
-            self.sam_generator = sam_generator
-        else:
-            self.sam_generator = init_sam_model()
         # BLIP2 init.
         if use_blip:
             if blip_processor is not None:
@@ -369,7 +373,8 @@ class EditAnythingLoraModel:
         if tile_model is not None:
             self.tile_pipe = tile_model
         else:
-            self.tile_pipe = obtain_tile_model(base_model_path, lora_model_path, lora_weight=lora_weight)
     def get_blip2_text(self, image):
         inputs = self.blip_processor(image, return_tensors="pt").to(
@@ -384,19 +389,92 @@ class EditAnythingLoraModel:
         full_img, res = show_anns(masks)
         return full_img, res
     @torch.inference_mode()
-    def process(self, source_image, enable_all_generate, mask_image,
-                control_scale,
-                enable_auto_prompt, prompt, a_prompt, n_prompt,
-                num_samples, image_resolution, detect_resolution,
                 ddim_steps, guess_mode, strength, scale, seed, eta,
-                enable_tile=True, condition_model=None):
         if condition_model is None:
             this_controlnet_path = self.default_controlnet_path
         else:
             this_controlnet_path = config_dict[condition_model]
-        input_image = source_image["image"]
         if mask_image is None:
             if enable_all_generate != self.defalut_enable_all_generate:
                 self.pipe = obtain_generation_model(
@@ -410,6 +488,8 @@ class EditAnythingLoraModel:
                     (input_image.shape[0], input_image.shape[1], 3))*255
             else:
                 mask_image = source_image["mask"]
         if self.default_controlnet_path != this_controlnet_path:
             print("To Use:", this_controlnet_path,
                   "Current:", self.default_controlnet_path)
@@ -424,10 +504,10 @@ class EditAnythingLoraModel:
                 print("Generating text:")
                 blip2_prompt = self.get_blip2_text(input_image)
                 print("Generated text:", blip2_prompt)
-                if len(prompt) > 0:
-                    prompt = blip2_prompt + ',' + prompt
                 else:
-                    prompt = blip2_prompt
             input_image = HWC3(input_image)
@@ -448,23 +528,23 @@ class EditAnythingLoraModel:
             control = torch.stack([control for _ in range(num_samples)], dim=0)
             control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-            mask_image = HWC3(mask_image.astype(np.uint8))
             mask_image_tmp = cv2.resize(
-                mask_image, (W, H), interpolation=cv2.INTER_LINEAR)
             mask_image = Image.fromarray(mask_image_tmp)
             if seed == -1:
                 seed = random.randint(0, 65535)
             seed_everything(seed)
             generator = torch.manual_seed(seed)
-            postive_prompt = prompt + ', ' + a_prompt
             negative_prompt = n_prompt
             prompt_embeds, negative_prompt_embeds = get_pipeline_embeds(
                 self.pipe, postive_prompt, negative_prompt, "cuda")
             prompt_embeds = torch.cat([prompt_embeds] * num_samples, dim=0)
             negative_prompt_embeds = torch.cat(
                 [negative_prompt_embeds] * num_samples, dim=0)
-            if enable_all_generate and self.extra_inpaint:
                 self.pipe.safety_checker = lambda images, clip_input: (
                     images, False)
                 x_samples = self.pipe(
@@ -485,7 +565,8 @@ class EditAnythingLoraModel:
                 if self.extra_inpaint:
                     inpaint_image = make_inpaint_condition(img, mask_image_tmp)
                     print(inpaint_image.shape)
-                    multi_condition_image.append(inpaint_image.type(torch.float16))
                     multi_condition_scale.append(1.0)
                 x_samples = self.pipe(
                     image=img,
@@ -501,33 +582,33 @@ class EditAnythingLoraModel:
                 ).images
             results = [x_samples[i] for i in range(num_samples)]
-            if True:
-                img_tile = [PIL.Image.fromarray(resize_image(np.array(x_samples[i]), 768)) for i in range(num_samples)]
-                # for each in img_tile:
-                #     print("tile",each.size)
                 prompt_embeds, negative_prompt_embeds = get_pipeline_embeds(
                     self.tile_pipe, postive_prompt, negative_prompt, "cuda")
-                prompt_embeds = torch.cat([prompt_embeds] * num_samples, dim=0)
-                negative_prompt_embeds = torch.cat(
-                    [negative_prompt_embeds] * num_samples, dim=0)
-                x_samples_tile = self.tile_pipe(
-                    prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds,
-                    num_images_per_prompt=num_samples,
-                    num_inference_steps=ddim_steps,
-                    generator=generator,
-                    height=img_tile[0].size[1],
-                    width=img_tile[0].size[0],
-                    image=img_tile,
-                    controlnet_conditioning_scale=1.0,
-                ).images
-                results_tile = [x_samples_tile[i] for i in range(num_samples)]
-                results = results_tile + results
-        return [full_segmask, mask_image] + results, prompt
     def download_image(url):
         response = requests.get(url)

 import os
 import requests
 from io import BytesIO
+from annotator.util import resize_image, HWC3, resize_points
 import torch
 from safetensors.torch import load_file
 from diffusers import StableDiffusionControlNetPipeline
 from diffusers import ControlNetModel, UniPCMultistepScheduler
 from utils.stable_diffusion_controlnet_inpaint import StableDiffusionControlNetInpaintPipeline
 # need the latest transformers
 # pip install git+https://github.com/huggingface/transformers.git
 from transformers import AutoProcessor, Blip2ForConditionalGeneration
 # Segment-Anything init.
 # pip install git+https://github.com/facebookresearch/segment-anything.git
 try:
+    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
 except ImportError:
     print('segment_anything not installed')
     result = subprocess.run(
         ['pip', 'install', 'git+https://github.com/facebookresearch/segment-anything.git'], check=True)
     print(f'Install segment_anything {result}')
+    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
 if not os.path.exists('./models/sam_vit_h_4b8939.pth'):
     result = subprocess.run(
         ['wget', 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth', '-P', 'models'], check=True)
 ])
+def init_sam_model(sam_generator=None, mask_predictor=None):
+    if sam_generator is not None and mask_predictor is not None:
+        return sam_generator, mask_predictor
     sam_checkpoint = "models/sam_vit_h_4b8939.pth"
     model_type = "default"
     sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
     sam.to(device=device)
+    sam_generator = SamAutomaticMaskGenerator(
+        sam) if sam_generator is None else sam_generator
+    mask_predictor = SamPredictor(
+        sam) if mask_predictor is None else mask_predictor
+    return sam_generator, mask_predictor
 def init_blip_processor():
     return torch.cat(concat_embeds, dim=1), torch.cat(neg_embeds, dim=1)
 def load_lora_weights(pipeline, checkpoint_path, multiplier, device, dtype):
     LORA_PREFIX_UNET = "lora_unet"
     LORA_PREFIX_TEXT_ENCODER = "lora_te"
     image = torch.from_numpy(image)
     return image
 def obtain_generation_model(base_model_path, lora_model_path, controlnet_path, generation_only=False, extra_inpaint=True, lora_weight=1.0):
     controlnet = []
+    controlnet.append(ControlNetModel.from_pretrained(
+        controlnet_path, torch_dtype=torch.float16))  # sam control
+    if (not generation_only) and extra_inpaint:  # inpainting control
         print("Warning: ControlNet based inpainting model only support SD1.5 for now.")
         controlnet.append(
             ControlNetModel.from_pretrained(
     pipe.enable_model_cpu_offload()
     return pipe
 def obtain_tile_model(base_model_path, lora_model_path, lora_weight=1.0):
     controlnet = ControlNetModel.from_pretrained(
+        'lllyasviel/control_v11f1e_sd15_tile', torch_dtype=torch.float16)  # tile controlnet
+    if base_model_path == 'runwayml/stable-diffusion-v1-5' or base_model_path == 'stabilityai/stable-diffusion-2-inpainting':
         print("base_model_path", base_model_path)
+        pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
             "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None
         )
     else:
+        pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+            base_model_path, controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None
         )
     if lora_model_path is not None:
         pipe = load_lora_weights(
     return pipe
 def show_anns(anns):
     if len(anns) == 0:
         return
                  blip_model=None,
                  sam_generator=None,
                  controlmodel_name='LAION Pretrained(v0-4)-SD15',
+                 # used when the base model is not an inpainting model.
+                 extra_inpaint=True,
                  tile_model=None,
                  lora_weight=1.0,
+                 mask_predictor=None
                  ):
         self.device = device
         self.use_blip = use_blip
             base_model_path, lora_model_path, self.default_controlnet_path, generation_only=False, extra_inpaint=extra_inpaint, lora_weight=lora_weight)
         # Segment-Anything init.
+        self.sam_generator, self.mask_predictor = init_sam_model(
+            sam_generator, mask_predictor)
         # BLIP2 init.
         if use_blip:
             if blip_processor is not None:
         if tile_model is not None:
             self.tile_pipe = tile_model
         else:
+            self.tile_pipe = obtain_tile_model(
+                base_model_path, lora_model_path, lora_weight=lora_weight)
     def get_blip2_text(self, image):
         inputs = self.blip_processor(image, return_tensors="pt").to(
         full_img, res = show_anns(masks)
         return full_img, res
+    def get_click_mask(self, image, clicked_points):
+        self.mask_predictor.set_image(image)
+        # Separate the points and labels
+        points, labels = zip(*[(point[:2], point[2])
+                             for point in clicked_points])
+        # Convert the points and labels to numpy arrays
+        input_point = np.array(points)
+        input_label = np.array(labels)
+        masks, _, _ = self.mask_predictor.predict(
+            point_coords=input_point,
+            point_labels=input_label,
+            multimask_output=False,
+        )
+        return masks
     @torch.inference_mode()
+    def process_image_click(self, original_image: gr.Image,
+                            point_prompt: gr.Radio,
+                            clicked_points: gr.State,
+                            image_resolution,
+                            evt: gr.SelectData):
+        # Get the clicked coordinates
+        clicked_coords = evt.index
+        x, y = clicked_coords
+        label = point_prompt
+        lab = 1 if label == "Foreground Point" else 0
+        clicked_points.append((x, y, lab))
+        input_image = np.array(original_image, dtype=np.uint8)
+        H, W, C = input_image.shape
+        input_image = HWC3(input_image)
+        img = resize_image(input_image, image_resolution)
+        # Update the clicked_points
+        resized_points = resize_points(clicked_points,
+                                       input_image.shape,
+                                       image_resolution)
+        mask_click_np = self.get_click_mask(img, resized_points)
+        # Convert mask_click_np to HWC format
+        mask_click_np = np.transpose(mask_click_np, (1, 2, 0)) * 255.0
+        mask_image = HWC3(mask_click_np.astype(np.uint8))
+        mask_image = cv2.resize(
+            mask_image, (W, H), interpolation=cv2.INTER_LINEAR)
+        # mask_image = Image.fromarray(mask_image_tmp)
+        # Draw circles for all clicked points
+        edited_image = input_image
+        for x, y, lab in clicked_points:
+            # Set the circle color based on the label
+            color = (255, 0, 0) if lab == 1 else (0, 0, 255)
+            # Draw the circle
+            edited_image = cv2.circle(edited_image, (x, y), 20, color, -1)
+        # Set the opacity for the mask_image and edited_image
+        opacity_mask = 0.75
+        opacity_edited = 1.0
+        # Combine the edited_image and the mask_image using cv2.addWeighted()
+        overlay_image = cv2.addWeighted(
+            edited_image, opacity_edited,
+            (mask_image * np.array([0/255, 255/255, 0/255])).astype(np.uint8),
+            opacity_mask, 0
+        )
+        return Image.fromarray(overlay_image), clicked_points, Image.fromarray(mask_image)
+    @torch.inference_mode()
+    def process(self, source_image, enable_all_generate, mask_image,
+                control_scale,
+                enable_auto_prompt, a_prompt, n_prompt,
+                num_samples, image_resolution, detect_resolution,
                 ddim_steps, guess_mode, strength, scale, seed, eta,
+                enable_tile=True, refine_alignment_ratio=None, refine_image_resolution=None, condition_model=None):
         if condition_model is None:
             this_controlnet_path = self.default_controlnet_path
         else:
             this_controlnet_path = config_dict[condition_model]
+        input_image = source_image["image"] if isinstance(
+            source_image, dict) else np.array(source_image, dtype=np.uint8)
         if mask_image is None:
             if enable_all_generate != self.defalut_enable_all_generate:
                 self.pipe = obtain_generation_model(
                     (input_image.shape[0], input_image.shape[1], 3))*255
             else:
                 mask_image = source_image["mask"]
+        else:
+            mask_image = np.array(mask_image, dtype=np.uint8)
         if self.default_controlnet_path != this_controlnet_path:
             print("To Use:", this_controlnet_path,
                   "Current:", self.default_controlnet_path)
                 print("Generating text:")
                 blip2_prompt = self.get_blip2_text(input_image)
                 print("Generated text:", blip2_prompt)
+                if len(a_prompt) > 0:
+                    a_prompt = blip2_prompt + ',' + a_prompt
                 else:
+                    a_prompt = blip2_prompt
             input_image = HWC3(input_image)
             control = torch.stack([control for _ in range(num_samples)], dim=0)
             control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+            mask_imag_ori = HWC3(mask_image.astype(np.uint8))
             mask_image_tmp = cv2.resize(
+                mask_imag_ori, (W, H), interpolation=cv2.INTER_LINEAR)
             mask_image = Image.fromarray(mask_image_tmp)
             if seed == -1:
                 seed = random.randint(0, 65535)
             seed_everything(seed)
             generator = torch.manual_seed(seed)
+            postive_prompt = a_prompt
             negative_prompt = n_prompt
             prompt_embeds, negative_prompt_embeds = get_pipeline_embeds(
                 self.pipe, postive_prompt, negative_prompt, "cuda")
             prompt_embeds = torch.cat([prompt_embeds] * num_samples, dim=0)
             negative_prompt_embeds = torch.cat(
                 [negative_prompt_embeds] * num_samples, dim=0)
+            if enable_all_generate and not self.extra_inpaint:
                 self.pipe.safety_checker = lambda images, clip_input: (
                     images, False)
                 x_samples = self.pipe(
                 if self.extra_inpaint:
                     inpaint_image = make_inpaint_condition(img, mask_image_tmp)
                     print(inpaint_image.shape)
+                    multi_condition_image.append(
+                        inpaint_image.type(torch.float16))
                     multi_condition_scale.append(1.0)
                 x_samples = self.pipe(
                     image=img,
                 ).images
             results = [x_samples[i] for i in range(num_samples)]
+            results_tile = []
+            if enable_tile:
                 prompt_embeds, negative_prompt_embeds = get_pipeline_embeds(
                     self.tile_pipe, postive_prompt, negative_prompt, "cuda")
+                for i in range(num_samples):
+                    img_tile = PIL.Image.fromarray(resize_image(
+                        np.array(x_samples[i]), refine_image_resolution))
+                    if i == 0:
+                        mask_image_tile = cv2.resize(
+                            mask_imag_ori, (img_tile.size[0], img_tile.size[1]), interpolation=cv2.INTER_LINEAR)
+                        mask_image_tile = Image.fromarray(mask_image_tile)
+                    x_samples_tile = self.tile_pipe(
+                        image=img_tile,
+                        mask_image=mask_image_tile,
+                        prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds,
+                        num_images_per_prompt=1,
+                        num_inference_steps=ddim_steps,
+                        generator=generator,
+                        controlnet_conditioning_image=img_tile,
+                        height=img_tile.size[1],
+                        width=img_tile.size[0],
+                        controlnet_conditioning_scale=1.0,
+                        alignment_ratio=refine_alignment_ratio,
+                    ).images
+                    results_tile += x_samples_tile
+        return results_tile, results, [full_segmask, mask_image], postive_prompt
     def download_image(url):
         response = requests.get(url)

utils/stable_diffusion_controlnet_inpaint.py CHANGED Viewed

@@ -835,6 +835,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, LoraLoaderMixi
         callback_steps: int = 1,
         cross_attention_kwargs: Optional[Dict[str, Any]] = None,
         controlnet_conditioning_scale: Union[float, List[float]] = 1.0,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
@@ -1115,12 +1116,15 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, LoraLoaderMixi
                     progress_bar.update()
                     if callback is not None and i % callback_steps == 0:
                         callback(i, t, latents)
-                # if self.unet.config.in_channels==4:
-                #     # masking for non-inpainting models
-                #     init_latents_proper = self.scheduler.add_noise(init_masked_image_latents, noise, t)
-                #     latents = (init_latents_proper * mask_image) + (latents * (1 - mask_image))
-            if self.unet.config.in_channels==4:
                 # fill the unmasked part with original image
                 latents = (init_masked_image_latents * mask_image) + (latents * (1 - mask_image))

         callback_steps: int = 1,
         cross_attention_kwargs: Optional[Dict[str, Any]] = None,
         controlnet_conditioning_scale: Union[float, List[float]] = 1.0,
+        alignment_ratio = None,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
                     progress_bar.update()
                     if callback is not None and i % callback_steps == 0:
                         callback(i, t, latents)
+                if self.unet.config.in_channels==4 and alignment_ratio is not None:
+                    if i < len(timesteps) * alignment_ratio:
+                        # print(i, len(timesteps))
+                        # masking for non-inpainting models
+                        init_latents_proper = self.scheduler.add_noise(init_masked_image_latents, noise, t)
+                        latents = (init_latents_proper * mask_image) + (latents * (1 - mask_image))
+            if self.unet.config.in_channels==4 and (alignment_ratio==1.0 or alignment_ratio is None):
                 # fill the unmasked part with original image
                 latents = (init_masked_image_latents * mask_image) + (latents * (1 - mask_image))