Spaces:

jiuface
/

flux-controlnet-inpainting

Running on Zero

App Files Files Community

jiuface commited on Sep 16

Commit

5903bf0

•

1 Parent(s): db8c7ad

add controlnet inpaint

Browse files

Files changed (3) hide show

app.py +79 -1
preprocessor.py +84 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -20,6 +20,8 @@ from io import BytesIO
 from datetime import datetime
 from diffusers.utils import load_image
 import json
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -33,9 +35,27 @@ dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model = "black-forest-labs/FLUX.1-dev"
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
-pipe = FluxInpaintPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1).to(device)
 class calculateDuration:
@@ -129,6 +149,8 @@ def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name):
 def run_flux(
     image: Image.Image,
     mask: Image.Image,
     prompt: str,
     lora_path: str,
     lora_weights: str,
@@ -157,6 +179,8 @@ def run_flux(
             prompt=prompt,
             image=image,
             mask_image=mask,
             width=width,
             height=height,
             strength=strength_slider,
@@ -175,6 +199,7 @@ def process(
     inpainting_prompt_text: str,
     mask_inflation_slider: int,
     mask_blur_slider: int,
     seed_slicer: int,
     randomize_seed_checkbox: bool,
     strength_slider: float,
@@ -217,10 +242,58 @@ def process(
         mask = mask.resize((width, height), Image.LANCZOS)
         mask = process_mask(mask, mask_inflation=mask_inflation_slider, mask_blur=mask_blur_slider)
     try:
         generated_image = run_flux(
             image=image,
             mask=mask,
             prompt=inpainting_prompt_text,
             lora_path=lora_path,
             lora_scale=lora_scale,
@@ -275,6 +348,10 @@ with gr.Blocks() as demo:
                     placeholder="Enter text to generate inpainting",
                     container=False,
                 )
             submit_button_component = gr.Button(value='Submit', variant='primary', scale=0)
@@ -382,6 +459,7 @@ with gr.Blocks() as demo:
             inpainting_prompt_text_component,
             mask_inflation_slider_component,
             mask_blur_slider_component,
             seed_slicer_component,
             randomize_seed_checkbox_component,
             strength_slider_component,

 from datetime import datetime
 from diffusers.utils import load_image
 import json
+from preprocessor import Preprocessor
+from diffusers.pipelines.flux.pipeline_flux_controlnet_inpaint import FluxControlNetInpaintPipeline
 HF_TOKEN = os.environ.get("HF_TOKEN")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model = "black-forest-labs/FLUX.1-dev"
+controlnet_model = 'InstantX/FLUX.1-dev-Controlnet-Union-alpha'
+controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
 good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
+pipe = FluxControlNetInpaintPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=dtype, vae=taef1).to(device)
+control_mode_ids = {
+    "scribble_hed": 0,
+    "canny": 0,        # supported
+    "mlsd": 0,         # supported
+    "tile": 1,         # supported
+    "depth_midas": 2,  # supported
+    "blur": 3,         # supported
+    "openpose": 4,     # supported
+    "gray": 5,         # supported
+    "low_quality": 6,  # supported
+}
 class calculateDuration:
 def run_flux(
     image: Image.Image,
     mask: Image.Image,
+    control_image: Image.Image,
+    control_mode: int,
     prompt: str,
     lora_path: str,
     lora_weights: str,
             prompt=prompt,
             image=image,
             mask_image=mask,
+            control_image=control_image,
+            control_mode=control_mode,
             width=width,
             height=height,
             strength=strength_slider,
     inpainting_prompt_text: str,
     mask_inflation_slider: int,
     mask_blur_slider: int,
+    control_mode: str,
     seed_slicer: int,
     randomize_seed_checkbox: bool,
     strength_slider: float,
         mask = mask.resize((width, height), Image.LANCZOS)
         mask = process_mask(mask, mask_inflation=mask_inflation_slider, mask_blur=mask_blur_slider)
+    # generated control_
+    with calculateDuration("Preprocessor Image"):
+        print("start to generate control image")
+        preprocessor = Preprocessor()
+        if control_mode == "depth_midas":
+            preprocessor.load("Midas")
+            control_image = preprocessor(
+                image=image,
+                image_resolution=width,
+                detect_resolution=512,
+            )
+        if control_mode == "openpose":
+            preprocessor.load("Openpose")
+            control_image = preprocessor(
+                image=image,
+                hand_and_face=True,
+                image_resolution=width,
+                detect_resolution=512,
+            )
+        if control_mode == "canny":
+            preprocessor.load("Canny")
+            control_image = preprocessor(
+                image=image,
+                image_resolution=width,
+                detect_resolution=512,
+            )
+        if control_mode == "mlsd":
+            preprocessor.load("MLSD")
+            control_image = preprocessor(
+                image=image_before,
+                image_resolution=width,
+                detect_resolution=512,
+            )
+        if control_mode == "scribble_hed":
+            preprocessor.load("HED")
+            control_image = preprocessor(
+                image=image_before,
+                image_resolution=image_resolution,
+                detect_resolution=preprocess_resolution,
+            )
+    control_mode_id = control_mode_ids[control_mode]
     try:
         generated_image = run_flux(
             image=image,
             mask=mask,
+            control_image=control_image,
+            control_mode=control_mode_id,
             prompt=inpainting_prompt_text,
             lora_path=lora_path,
             lora_scale=lora_scale,
                     placeholder="Enter text to generate inpainting",
                     container=False,
                 )
+            control_mode = gr.Dropdown(
+                [ "canny", "depth_midas",  "openpose", "mlsd", "low_quality", "gray", "blur", "tile"], label="Controlnet Model", info="choose controlnet model!", value="canny"
+            )
             submit_button_component = gr.Button(value='Submit', variant='primary', scale=0)
             inpainting_prompt_text_component,
             mask_inflation_slider_component,
             mask_blur_slider_component,
+            control_mode,
             seed_slicer_component,
             randomize_seed_checkbox_component,
             strength_slider_component,

preprocessor.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import gc
+import numpy as np
+import PIL.Image
+import torch
+import torchvision
+from controlnet_aux import (
+    CannyDetector,
+    ContentShuffleDetector,
+    HEDdetector,
+    LineartAnimeDetector,
+    LineartDetector,
+    MidasDetector,
+    MLSDdetector,
+    NormalBaeDetector,
+    OpenposeDetector,
+    PidiNetDetector,
+)
+from controlnet_aux.util import HWC3
+from cv_utils import resize_image
+from depth_estimator import DepthEstimator
+from image_segmentor import ImageSegmentor
+from kornia.core import Tensor
+# load preprocessor
+# HED = HEDdetector.from_pretrained("lllyasviel/Annotators")
+Midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
+MLSD = MLSDdetector.from_pretrained("lllyasviel/Annotators")
+Canny = CannyDetector()
+OPENPOSE =  OpenposeDetector.from_pretrained("lllyasviel/Annotators")
+class Preprocessor:
+    MODEL_ID = "lllyasviel/Annotators"
+    def __init__(self):
+        self.model = None
+        self.name = ""
+    def load(self, name: str) -> None:
+        if name == self.name:
+            return
+        if name == "Midas":
+            self.model = Midas
+        elif name == "MLSD":
+            self.model =MLSD
+        elif name == "Openpose":
+            self.model = OPENPOSE
+        elif name == "Canny":
+            self.model = Canny
+        else:
+            raise ValueError
+        torch.cuda.empty_cache()
+        gc.collect()
+        self.name = name
+    def __call__(self, image: PIL.Image.Image, **kwargs) -> PIL.Image.Image:
+        if self.name == "Canny" or self.name == "MLSD":
+            detect_resolution = kwargs.pop("detect_resolution")
+            image_resolution = kwargs.pop("image_resolution", 512)
+            image = np.array(image)
+            image = HWC3(image)
+            image = resize_image(image, resolution=detect_resolution)
+            image = self.model(image, **kwargs)
+            image = np.array(image)
+            image = HWC3(image)
+            image = resize_image(image, resolution=image_resolution)
+            return PIL.Image.fromarray(image).convert('RGB')
+        else:
+            detect_resolution = kwargs.pop("detect_resolution", 512)
+            image_resolution = kwargs.pop("image_resolution", 512)
+            image = np.array(image)
+            image = HWC3(image)
+            image = resize_image(image, resolution=detect_resolution)
+            image = self.model(image, **kwargs)
+            image = np.array(image)
+            image = HWC3(image)
+            image = resize_image(image, resolution=image_resolution)
+            return PIL.Image.fromarray(image)

requirements.txt CHANGED Viewed

@@ -16,4 +16,6 @@ requests
 git+https://github.com/mylovelycodes/diffusers.git
 boto3
 sentencepiece
-peft

 git+https://github.com/mylovelycodes/diffusers.git
 boto3
 sentencepiece
+peft
+controlnet-aux
+kornia