Spaces:

vittore
/

pattern-into-image

Running

App Files Files Community

vittore commited on May 16

Commit

7b64ad2

•

1 Parent(s): 9952335

Add a beautiful description

Browse files

Files changed (2) hide show

app.py +52 -58
safety_checker.py +137 -0

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 from gradio import processing_utils, utils
 from PIL import Image
 import random
 from diffusers import (
     DiffusionPipeline,
     AutoencoderKL,
@@ -12,39 +13,60 @@ from diffusers import (
     StableDiffusionLatentUpscalePipeline,
     StableDiffusionImg2ImgPipeline,
     StableDiffusionControlNetImg2ImgPipeline,
-    DPMSolverMultistepScheduler,  # <-- Added import
-    EulerDiscreteScheduler  # <-- Added import
 )
 import tempfile
 import time
 from share_btn import community_icon_html, loading_icon_html, share_js
 import user_history
 from illusion_style import css
 BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
-device='cpu'
 # Initialize both pipelines
 vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
-#init_pipe = DiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V5.1_noVAE", torch_dtype=torch.float16)
-controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)#, torch_dtype=torch.float16)
 main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
     BASE_MODEL,
     controlnet=controlnet,
     vae=vae,
-    safety_checker=None,
     torch_dtype=torch.float16,
-).to(device)
 #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
 #main_pipe.unet.to(memory_format=torch.channels_last)
 #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
 #model_id = "stabilityai/sd-x2-latent-upscaler"
 image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)
 #image_pipe.unet = torch.compile(image_pipe.unet, mode="reduce-overhead", fullgraph=True)
 #upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
 #upscaler.to("cuda")
@@ -104,12 +126,13 @@ def check_inputs(prompt: str, control_image: Image.Image):
         raise gr.Error("Prompt is required")
 def convert_to_pil(base64_image):
-    pil_image = processing_utils.decode_base64_to_image(base64_image)
     return pil_image
 def convert_to_base64(pil_image):
-    base64_image = processing_utils.encode_pil_to_base64(pil_image)
-    return base64_image
 # Inference function
 @spaces.GPU
@@ -141,7 +164,7 @@ def inference(
     main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
     my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
-    generator = torch.Generator(device=device).manual_seed(my_seed)
     out = main_pipe(
         prompt=prompt,
@@ -197,15 +220,17 @@ def inference(
 with gr.Blocks() as app:
     gr.Markdown(
         '''
-        <center><h1>Illusion Diffusion HQ 🌀</h1></span>
-        <span font-size:16px;">Generate stunning high quality illusion artwork with Stable Diffusion</span>
-        </center>
-        A space by AP [Follow me on Twitter](https://twitter.com/angrypenguinPNG) with big contributions from [multimodalart](https://twitter.com/multimodalart)
-        This project works by using [Monster Labs QR Control Net](https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster).
-        Given a prompt and your pattern, we use a QR code conditioned controlnet to create a stunning illusion! Credit to: [MrUgleh](https://twitter.com/MrUgleh) for discovering the workflow :)
         '''
     )
     state_img_input = gr.State()
     state_img_output = gr.State()
     with gr.Row():
@@ -235,53 +260,22 @@ with gr.Blocks() as app:
         check_inputs,
         inputs=[prompt, control_image],
         queue=False
-    ).success(
-        convert_to_pil,
-        inputs=[control_image],
-        outputs=[state_img_input],
-        queue=False,
-        preprocess=False,
     ).success(
         inference,
-        inputs=[state_img_input, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
-        outputs=[state_img_output, result_image, share_group, used_seed]
-    ).success(
-        convert_to_base64,
-        inputs=[state_img_output],
-        outputs=[result_image],
-        queue=False,
-        postprocess=False
-    )
     run_btn.click(
         check_inputs,
         inputs=[prompt, control_image],
         queue=False
-    ).success(
-        convert_to_pil,
-        inputs=[control_image],
-        outputs=[state_img_input],
-        queue=False,
-        preprocess=False,
     ).success(
         inference,
-        inputs=[state_img_input, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
-        outputs=[state_img_output, result_image, share_group, used_seed]
-    ).success(
-        convert_to_base64,
-        inputs=[state_img_output],
-        outputs=[result_image],
-        queue=False,
-        postprocess=False
-    )
     share_button.click(None, [], [], js=share_js)
-def greet(name):
-    return "Hello " + name + "!!"
-#demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-#demo.launch()
 with gr.Blocks(css=css) as app_with_history:
     with gr.Tab("Demo"):
         app.render()

 from gradio import processing_utils, utils
 from PIL import Image
 import random
 from diffusers import (
     DiffusionPipeline,
     AutoencoderKL,
     StableDiffusionLatentUpscalePipeline,
     StableDiffusionImg2ImgPipeline,
     StableDiffusionControlNetImg2ImgPipeline,
+    DPMSolverMultistepScheduler,
+    EulerDiscreteScheduler
 )
 import tempfile
 import time
 from share_btn import community_icon_html, loading_icon_html, share_js
 import user_history
 from illusion_style import css
+import os
+from transformers import CLIPImageProcessor
+from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
 # Initialize both pipelines
 vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
+controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)
+# Initialize the safety checker conditionally
+SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
+safety_checker = None
+feature_extractor = None
+if SAFETY_CHECKER_ENABLED:
+    safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
+    feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
 main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
     BASE_MODEL,
     controlnet=controlnet,
     vae=vae,
+    safety_checker=safety_checker,
+    feature_extractor=feature_extractor,
     torch_dtype=torch.float16,
+).to("cuda")
+# Function to check NSFW images
+#def check_nsfw_images(images: list[Image.Image]) -> tuple[list[Image.Image], list[bool]]:
+#    if SAFETY_CHECKER_ENABLED:
+#        safety_checker_input = feature_extractor(images, return_tensors="pt").to("cuda")
+#        has_nsfw_concepts = safety_checker(
+#            images=[images],
+#            clip_input=safety_checker_input.pixel_values.to("cuda")
+#        )
+#        return images, has_nsfw_concepts
+#    else:
+#        return images, [False] * len(images)
 #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
 #main_pipe.unet.to(memory_format=torch.channels_last)
 #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
 #model_id = "stabilityai/sd-x2-latent-upscaler"
 image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)
 #image_pipe.unet = torch.compile(image_pipe.unet, mode="reduce-overhead", fullgraph=True)
 #upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
 #upscaler.to("cuda")
         raise gr.Error("Prompt is required")
 def convert_to_pil(base64_image):
+    pil_image = Image.open(base64_image)
     return pil_image
 def convert_to_base64(pil_image):
+    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
+        image.save(temp_file.name)
+    return temp_file.name
 # Inference function
 @spaces.GPU
     main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
     my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
+    generator = torch.Generator(device="cuda").manual_seed(my_seed)
     out = main_pipe(
         prompt=prompt,
 with gr.Blocks() as app:
     gr.Markdown(
         '''
+        <div style="text-align: center;">
+            <h1>Illusion Diffusion HQ 🌀</h1>
+            <p style="font-size:16px;">Generate stunning high quality illusion artwork with Stable Diffusion</p>
+            <p>Illusion Diffusion is back up with a safety checker! Because I have been asked, if you would like to support me, consider using <a href="https://deforum.studio">deforum.studio</a></p>
+            <p>A space by AP <a href="https://twitter.com/angrypenguinPNG">Follow me on Twitter</a> with big contributions from <a href="https://twitter.com/multimodalart">multimodalart</a></p>
+            <p>This project works by using <a href="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster">Monster Labs QR Control Net</a>. Given a prompt and your pattern, we use a QR code conditioned controlnet to create a stunning illusion! Credit to: <a href="https://twitter.com/MrUgleh">MrUgleh</a> for discovering the workflow :)</p>
+        </div>
         '''
     )
     state_img_input = gr.State()
     state_img_output = gr.State()
     with gr.Row():
         check_inputs,
         inputs=[prompt, control_image],
         queue=False
     ).success(
         inference,
+        inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
+        outputs=[result_image, result_image, share_group, used_seed])
     run_btn.click(
         check_inputs,
         inputs=[prompt, control_image],
         queue=False
     ).success(
         inference,
+        inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
+        outputs=[result_image, result_image, share_group, used_seed])
     share_button.click(None, [], [], js=share_js)
 with gr.Blocks(css=css) as app_with_history:
     with gr.Tab("Demo"):
         app.render()

safety_checker.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import torch
+import torch.nn as nn
+from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
+def cosine_distance(image_embeds, text_embeds):
+    normalized_image_embeds = nn.functional.normalize(image_embeds)
+    normalized_text_embeds = nn.functional.normalize(text_embeds)
+    return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
+class StableDiffusionSafetyChecker(PreTrainedModel):
+    config_class = CLIPConfig
+    _no_split_modules = ["CLIPEncoderLayer"]
+    def __init__(self, config: CLIPConfig):
+        super().__init__(config)
+        self.vision_model = CLIPVisionModel(config.vision_config)
+        self.visual_projection = nn.Linear(
+            config.vision_config.hidden_size, config.projection_dim, bias=False
+        )
+        self.concept_embeds = nn.Parameter(
+            torch.ones(17, config.projection_dim), requires_grad=False
+        )
+        self.special_care_embeds = nn.Parameter(
+            torch.ones(3, config.projection_dim), requires_grad=False
+        )
+        self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False)
+        self.special_care_embeds_weights = nn.Parameter(
+            torch.ones(3), requires_grad=False
+        )
+    @torch.no_grad()
+    def forward(self, clip_input, images):
+        pooled_output = self.vision_model(clip_input)[1]  # pooled_output
+        image_embeds = self.visual_projection(pooled_output)
+        # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
+        special_cos_dist = (
+            cosine_distance(image_embeds, self.special_care_embeds)
+            .cpu()
+            .float()
+            .numpy()
+        )
+        cos_dist = (
+            cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy()
+        )
+        result = []
+        batch_size = image_embeds.shape[0]
+        for i in range(batch_size):
+            result_img = {
+                "special_scores": {},
+                "special_care": [],
+                "concept_scores": {},
+                "bad_concepts": [],
+            }
+            # increase this value to create a stronger `nfsw` filter
+            # at the cost of increasing the possibility of filtering benign images
+            adjustment = 0.0
+            for concept_idx in range(len(special_cos_dist[0])):
+                concept_cos = special_cos_dist[i][concept_idx]
+                concept_threshold = self.special_care_embeds_weights[concept_idx].item()
+                result_img["special_scores"][concept_idx] = round(
+                    concept_cos - concept_threshold + adjustment, 3
+                )
+                if result_img["special_scores"][concept_idx] > 0:
+                    result_img["special_care"].append(
+                        {concept_idx, result_img["special_scores"][concept_idx]}
+                    )
+                    adjustment = 0.01
+            for concept_idx in range(len(cos_dist[0])):
+                concept_cos = cos_dist[i][concept_idx]
+                concept_threshold = self.concept_embeds_weights[concept_idx].item()
+                result_img["concept_scores"][concept_idx] = round(
+                    concept_cos - concept_threshold + adjustment, 3
+                )
+                if result_img["concept_scores"][concept_idx] > 0:
+                    result_img["bad_concepts"].append(concept_idx)
+            result.append(result_img)
+        has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
+        return has_nsfw_concepts
+    @torch.no_grad()
+    def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
+        pooled_output = self.vision_model(clip_input)[1]  # pooled_output
+        image_embeds = self.visual_projection(pooled_output)
+        special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
+        cos_dist = cosine_distance(image_embeds, self.concept_embeds)
+        # increase this value to create a stronger `nsfw` filter
+        # at the cost of increasing the possibility of filtering benign images
+        adjustment = 0.0
+        special_scores = (
+            special_cos_dist - self.special_care_embeds_weights + adjustment
+        )
+        # special_scores = special_scores.round(decimals=3)
+        special_care = torch.any(special_scores > 0, dim=1)
+        special_adjustment = special_care * 0.01
+        special_adjustment = special_adjustment.unsqueeze(1).expand(
+            -1, cos_dist.shape[1]
+        )
+        concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
+        # concept_scores = concept_scores.round(decimals=3)
+        has_nsfw_concepts = torch.any(concept_scores > 0, dim=1)
+        images[has_nsfw_concepts] = 0.0  # black image
+        return images, has_nsfw_concepts