Spaces:

1aurent
/

ic_light

Running on Zero

App Files Files Community

1aurent commited on Oct 20, 2024

Commit

256da70

unverified ·

1 Parent(s): 0394c27

add app

Browse files

Files changed (8) hide show

README.md +3 -3
examples/bunny.png +0 -0
examples/chair.png +0 -0
examples/plant.png +0 -0
pyproject.toml +3 -0
requirements.txt +2 -0
src/app.py +242 -0
src/utils.py +112 -0

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
-title: Ic Light
 emoji: 👁
 colorFrom: gray
 colorTo: blue
 sdk: gradio
-sdk_version: 4.40.0
-app_file: app.py
 pinned: false
 license: mit
 ---

 ---
+title: Refiners IC-Light
 emoji: 👁
 colorFrom: gray
 colorTo: blue
 sdk: gradio
+sdk_version: 5.1.0
+app_file: src/app.py
 pinned: false
 license: mit
 ---

examples/bunny.png ADDED Viewed

examples/chair.png ADDED Viewed

examples/plant.png ADDED Viewed

pyproject.toml ADDED Viewed

	@@ -0,0 +1,3 @@


1	+
2	+ [tool.ruff]
3	+ line-length = 120

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ git+https://github.com/finegrain-ai/refiners@06204731093d8055e65b21b4da2ce586737d6ea4
2	+ pillow-heif>=0.18.0

src/app.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import gradio as gr  # pyright: ignore[reportMissingTypeStubs]
+import pillow_heif  # pyright: ignore[reportMissingTypeStubs]
+import spaces  # pyright: ignore[reportMissingTypeStubs]
+import torch
+from PIL import Image
+from refiners.fluxion.utils import manual_seed, no_grad
+from utils import LightingPreference, load_ic_light, resize_modulo_8
+pillow_heif.register_heif_opener()  # pyright: ignore[reportUnknownMemberType]
+pillow_heif.register_avif_opener()  # pyright: ignore[reportUnknownMemberType]
+TITLE = """
+# IC-Light with Refiners
+"""
+# initialize the enhancer, on the cpu
+DEVICE_CPU = torch.device("cpu")
+DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
+ic_light = load_ic_light(device=DEVICE_CPU, dtype=DTYPE)
+# "move" the enhancer to the gpu, this is handled/intercepted by Zero GPU
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ic_light.to(device=DEVICE, dtype=DTYPE)
+ic_light.device = DEVICE
+ic_light.dtype = DTYPE
+ic_light.solver = ic_light.solver.to(device=DEVICE, dtype=DTYPE)
+@spaces.GPU
+@no_grad()
+def process(
+    image: Image.Image,
+    light_pref: str,
+    prompt: str,
+    negative_prompt: str,
+    strength_first_pass: float,
+    strength_second_pass: float,
+    condition_scale: float,
+    num_inference_steps: int,
+    seed: int,
+) -> Image.Image:
+    assert image.mode == "RGBA"
+    assert 0 <= strength_second_pass <= 1
+    assert 0 <= strength_first_pass <= 1
+    assert num_inference_steps > 0
+    assert seed >= 0
+    # set the seed
+    manual_seed(seed)
+    # resize image to ~768x768
+    image = resize_modulo_8(image, 768)
+    # split RGB and alpha channel
+    mask = image.getchannel("A")
+    image = image.convert("RGB")
+    # compute embeddings
+    clip_text_embedding = ic_light.compute_clip_text_embedding(text=prompt, negative_text=negative_prompt)
+    ic_light.set_ic_light_condition(image=image, mask=mask)
+    # get the light_pref_image
+    light_pref_image = LightingPreference.from_str(value=light_pref).get_init_image(
+        width=image.width,
+        height=image.height,
+        interval=(0.2, 0.8),
+    )
+    # if no light preference is provided, do a full strength first pass
+    if light_pref_image is None:
+        x = torch.randn_like(ic_light._ic_light_condition)  # pyright: ignore[reportPrivateUsage]
+        strength_first_pass = 1.0
+    else:
+        x = ic_light.lda.image_to_latents(light_pref_image)
+        x = ic_light.solver.add_noise(x, noise=torch.randn_like(x), step=0)
+    # configure the first pass
+    num_steps = int(round(num_inference_steps / strength_first_pass))
+    first_step = int(num_steps * (1 - strength_first_pass))
+    ic_light.set_inference_steps(num_steps, first_step)
+    # first pass
+    for step in ic_light.steps:
+        x = ic_light(
+            x,
+            step=step,
+            clip_text_embedding=clip_text_embedding,
+            condition_scale=condition_scale,
+        )
+    # configure the second pass
+    num_steps = int(round(num_inference_steps / strength_second_pass))
+    first_step = int(num_steps * (1 - strength_second_pass))
+    ic_light.set_inference_steps(num_steps, first_step)
+    # initialize the latents
+    x = ic_light.solver.add_noise(x, noise=torch.randn_like(x), step=first_step)
+    # second pass
+    for step in ic_light.steps:
+        x = ic_light(
+            x,
+            step=step,
+            clip_text_embedding=clip_text_embedding,
+            condition_scale=condition_scale,
+        )
+    return ic_light.lda.latents_to_image(x)
+with gr.Blocks() as demo:
+    gr.Markdown(TITLE)
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Input Image", image_mode="RGBA")
+            run_button = gr.Button(value="Relight Image")
+        with gr.Column():
+            output_image = gr.Image(label="Result")
+    with gr.Accordion("Advanced Settings", open=True):
+        prompt = gr.Textbox(
+            label="Prompt",
+            placeholder="bright green neon light, best quality, highres",
+        )
+        neg_prompt = gr.Textbox(
+            label="Negative Prompt",
+            placeholder="worst quality, low quality, normal quality",
+        )
+        light_pref = gr.Radio(
+            choices=["None", "Left", "Right", "Top", "Bottom"],
+            label="Light direction preference",
+            value="None",
+        )
+        seed = gr.Slider(
+            label="Seed",
+            minimum=0,
+            maximum=100_000,
+            value=69_420,
+            step=1,
+        )
+        condition_scale = gr.Slider(
+            label="Condition scale",
+            minimum=0.5,
+            maximum=2,
+            value=1.25,
+            step=0.05,
+        )
+        num_inference_steps = gr.Slider(
+            label="Number of inference steps",
+            minimum=1,
+            maximum=50,
+            value=25,
+            step=1,
+        )
+        with gr.Row():
+            strength_first_pass = gr.Slider(
+                label="Strength of the first pass",
+                minimum=0,
+                maximum=1,
+                value=0.9,
+                step=0.1,
+            )
+            strength_second_pass = gr.Slider(
+                label="Strength of the second pass",
+                minimum=0,
+                maximum=1,
+                value=0.5,
+                step=0.1,
+            )
+    run_button.click(
+        fn=process,
+        inputs=[
+            input_image,
+            light_pref,
+            prompt,
+            neg_prompt,
+            strength_first_pass,
+            strength_second_pass,
+            condition_scale,
+            num_inference_steps,
+            seed,
+        ],
+        outputs=output_image,
+    )
+    gr.Examples(  # pyright: ignore[reportUnknownMemberType]
+        examples=[
+            [
+                "examples/plant.png",
+                "None",
+                "blue purple neon light, cyberpunk city background, high-quality professional studo photography, realistic soft lighting, HEIC, CR2, NEF",
+                "dirty, messy, worst quality, low quality, watermark, signature, jpeg artifacts, deformed, monochrome, black and white",
+                0.9,
+                0.5,
+                1.25,
+                25,
+                69_420,
+            ],
+            [
+                "examples/plant.png",
+                "Right",
+                "blue purple neon light, cyberpunk city background, high-quality professional studo photography, realistic soft lighting, HEIC, CR2, NEF",
+                "dirty, messy, worst quality, low quality, watermark, signature, jpeg artifacts, deformed, monochrome, black and white",
+                0.9,
+                0.5,
+                1.25,
+                25,
+                69_420,
+            ],
+            [
+                "examples/plant.png",
+                "Left",
+                "floor is blue ice cavern, stalactite, high-quality professional studo photography, realistic soft lighting, HEIC, CR2, NEF",
+                "dirty, messy, worst quality, low quality, watermark, signature, jpeg artifacts, deformed, monochrome, black and white",
+                0.9,
+                0.5,
+                1.25,
+                25,
+                69_420,
+            ],
+        ],
+        inputs=[
+            input_image,
+            light_pref,
+            prompt,
+            neg_prompt,
+            strength_first_pass,
+            strength_second_pass,
+            condition_scale,
+            num_inference_steps,
+            seed,
+        ],
+        outputs=output_image,
+        fn=process,
+        cache_examples="lazy",  # type: ignore
+        run_on_click=False,
+    )
+demo.launch()

src/utils.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from enum import Enum, auto
+import torch
+from huggingface_hub import (  # pyright: ignore[reportMissingTypeStubs]
+    hf_hub_download,  # pyright: ignore[reportUnknownVariableType]
+)
+from PIL import Image
+from refiners.fluxion.utils import load_from_safetensors, tensor_to_image
+from refiners.foundationals.clip import CLIPTextEncoderL
+from refiners.foundationals.latent_diffusion import SD1UNet
+from refiners.foundationals.latent_diffusion.stable_diffusion_1 import SD1Autoencoder
+from refiners.foundationals.latent_diffusion.stable_diffusion_1.ic_light import ICLight
+def load_ic_light(device: torch.device, dtype: torch.dtype) -> ICLight:
+    return ICLight(
+        patch_weights=load_from_safetensors(
+            path=hf_hub_download(
+                repo_id="refiners/sd15.ic_light.fc",
+                filename="model.safetensors",
+                revision="ea10b4403e97c786a98afdcbdf0e0fec794ea542",
+            ),
+        ),
+        unet=SD1UNet(in_channels=4, device=device, dtype=dtype).load_from_safetensors(
+            tensors_path=hf_hub_download(
+                repo_id="refiners/sd15.realistic_vision.v5_1.unet",
+                filename="model.safetensors",
+                revision="94f74be7adfd27bee330ea1071481c0254c29989",
+            )
+        ),
+        clip_text_encoder=CLIPTextEncoderL(device=device, dtype=dtype).load_from_safetensors(
+            tensors_path=hf_hub_download(
+                repo_id="refiners/sd15.realistic_vision.v5_1.text_encoder",
+                filename="model.safetensors",
+                revision="7f6fa1e870c8f197d34488e14b89e63fb8d7fd6e",
+            )
+        ),
+        lda=SD1Autoencoder(device=device, dtype=dtype).load_from_safetensors(
+            tensors_path=hf_hub_download(
+                repo_id="refiners/sd15.realistic_vision.v5_1.autoencoder",
+                filename="model.safetensors",
+                revision="99f089787a6e1a852a0992da1e286a19fcbbaa50",
+            )
+        ),
+        device=device,
+        dtype=dtype,
+    )
+def resize_modulo_8(
+    image: Image.Image,
+    size: int = 768,
+    resample: Image.Resampling | None = None,
+    on_short: bool = True,
+) -> Image.Image:
+    """
+    Resize an image respecting the aspect ratio and ensuring the size is a multiple of 8.
+    The `on_short` parameter determines whether the resizing is based on the shortest side.
+    """
+    assert size % 8 == 0, "Size must be a multiple of 8 because this is the latent compression size."
+    side_size = min(image.size) if on_short else max(image.size)
+    scale = size / (side_size * 8)
+    new_size = (int(image.width * scale) * 8, int(image.height * scale) * 8)
+    return image.resize(new_size, resample=resample or Image.Resampling.LANCZOS)
+class LightingPreference(str, Enum):
+    LEFT = auto()
+    RIGHT = auto()
+    TOP = auto()
+    BOTTOM = auto()
+    NONE = auto()
+    def get_init_image(self, width: int, height: int, interval: tuple[float, float] = (0.0, 1.0)) -> Image.Image | None:
+        """
+        Generate an image with a linear gradient based on the lighting preference.
+        In the original code, interval is always (0., 1.) ; we added it as a parameter to make the function more
+        flexible and allow for less contrasted images with a smaller interval.
+        see https://github.com/lllyasviel/IC-Light/blob/7886874/gradio_demo.py#L242
+        """
+        start, end = interval
+        match self:
+            case LightingPreference.LEFT:
+                tensor = torch.linspace(end, start, width).repeat(1, 1, height, 1)
+            case LightingPreference.RIGHT:
+                tensor = torch.linspace(start, end, width).repeat(1, 1, height, 1)
+            case LightingPreference.TOP:
+                tensor = torch.linspace(end, start, height).repeat(1, 1, width, 1).transpose(2, 3)
+            case LightingPreference.BOTTOM:
+                tensor = torch.linspace(start, end, height).repeat(1, 1, width, 1).transpose(2, 3)
+            case LightingPreference.NONE:
+                return None
+        return tensor_to_image(tensor).convert("RGB")
+    @classmethod
+    def from_str(cls, value: str):
+        match value.lower():
+            case "left":
+                return LightingPreference.LEFT
+            case "right":
+                return LightingPreference.RIGHT
+            case "top":
+                return LightingPreference.TOP
+            case "bottom":
+                return LightingPreference.BOTTOM
+            case "none":
+                return LightingPreference.NONE
+            case _:
+                raise ValueError(f"Invalid lighting preference: {value}")