Spaces:

m4r4k0s23
/

DiffModels

Sleeping

App Files Files Community

m4r4k0s23 commited on Feb 18

Commit

9db3da3

verified ·

1 Parent(s): a85e632

Update app.py

Browse files

Files changed (1) hide show

app.py +225 -159

app.py CHANGED Viewed

@@ -1,113 +1,122 @@
 import gradio as gr
 import numpy as np
 import random
-from diffusers import DiffusionPipeline
-from peft import PeftModel, PeftConfig
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Model list including your LoRA model
-MODEL_LIST = [
-    "CompVis/stable-diffusion-v1-4",
-    "stabilityai/sdxl-turbo",
-    "runwayml/stable-diffusion-v1-5",
-    "stabilityai/stable-diffusion-2-1",
-    "m4r4k0s23/hw5_lora_raccoon",
-]
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
-# Cache to avoid re-initializing pipelines repeatedly
-model_cache = {}
-def load_pipeline(model_id: str):
-    """
-    Loads or retrieves a cached DiffusionPipeline.
-    If the chosen model is your LoRA adapter, then load the base model
-    (CompVis/stable-diffusion-v1-4) and apply the LoRA weights.
-    """
-    if model_id in model_cache:
-        return model_cache[model_id]
-    if model_id == "m4r4k0s23/hw5_lora_raccoon":
-        # Use the specified base model for your LoRA adapter.
-        base_model = "CompVis/stable-diffusion-v1-4"
-        pipe = DiffusionPipeline.from_pretrained(base_model, torch_dtype=torch_dtype)
-        # Load the LoRA weights
-        pipe.unet = PeftModel.from_pretrained(
-            pipe.unet,
-            model_id,
-            subfolder="unet",
-            torch_dtype=torch_dtype
-        )
-        pipe.text_encoder = PeftModel.from_pretrained(
-            pipe.text_encoder,
-            model_id,
-            subfolder="text_encoder",
-            torch_dtype=torch_dtype
-        )
-    else:
-        pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
-    pipe.to(device)
-    model_cache[model_id] = pipe
-    return pipe
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 def infer(
-    model_id,
     prompt,
     negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    lora_scale,  # New parameter for adjusting LoRA scale
-    progress=gr.Progress(track_tqdm=True),
-):
-    # Load the pipeline for the chosen model
-    pipe = load_pipeline(model_id)
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    # If using the LoRA model, update the LoRA scale if supported.
-    if model_id == "m4r4k0s23/hw5_lora_raccoon":
-        # This assumes your pipeline's unet has a method to update the LoRA scale.
-        if hasattr(pipe.unet, "set_lora_scale"):
-            pipe.unet.set_lora_scale(lora_scale)
         else:
-            print("Warning: LoRA scale adjustment method not found on UNet.")
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
 css = """
 #col-container {
@@ -116,55 +125,131 @@ css = """
 }
 """
-with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
         with gr.Row():
-            # Dropdown to select the model from Hugging Face
-            model_id = gr.Dropdown(
-                label="Model",
-                choices=MODEL_LIST,
-                value=MODEL_LIST[0],  # Default model
-            )
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
                 max_lines=1,
-                placeholder="Enter a negative prompt",
             )
-            seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 maximum=MAX_SEED,
                 step=1,
-                value=42,  # Default seed
             )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=1024,
                 )
                 height = gr.Slider(
@@ -172,54 +257,35 @@ with gr.Blocks(css=css) as demo:
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=1024,
                 )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=20.0,
-                    step=0.5,
-                    value=7.0,
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=100,
-                    step=1,
-                    value=20,
-                )
-            # New slider for LoRA scale.
-            lora_scale = gr.Slider(
-                label="LoRA Scale",
-                minimum=0.0,
-                maximum=2.0,
-                step=0.1,
-                value=1.0,
-                info="Adjust the influence of the LoRA weights",
-            )
-        gr.Examples(examples=examples, inputs=[prompt])
     gr.on(
-        triggers=[run_button.click, prompt.submit],
         fn=infer,
         inputs=[
-            model_id,
             prompt,
             negative_prompt,
-            seed,
-            randomize_seed,
             width,
             height,
-            guidance_scale,
             num_inference_steps,
-            lora_scale,  # Pass the new slider value
         ],
-        outputs=[result, seed],
     )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import numpy as np
 import random
+import os
 import torch
+from diffusers import StableDiffusionPipeline, ControlNetModel, StableDiffusionControlNetPipeline
+from diffusers.utils import load_image
+from peft import PeftModel, LoraConfig
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_id_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+# @spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
     prompt,
     negative_prompt,
+    width=512,
+    height=512,
+    model_id=model_id_default,
+    seed=42,
+    guidance_scale=7.0,
+    lora_scale=1.0,
+    num_inference_steps=20,
+    controlnet_checkbox=False,
+    controlnet_strength=0.0,
+    controlnet_mode="edge_detection",
+    controlnet_image=None,
+    ip_adapter_checkbox=False,
+    ip_adapter_scale=0.0,
+    ip_adapter_image=None,
+    progress=gr.Progress(track_tqdm=True),
+):
+    unet_sub_dir = "unet"
+    text_encoder_sub_dir = "text_encoder"
+    if model_id is None:
+        raise ValueError("Please specify the base model name or path")
+    generator = torch.Generator(device).manual_seed(seed)
+    params = {'prompt': prompt,
+              'negative_prompt': negative_prompt,
+              'guidance_scale': guidance_scale,
+              'num_inference_steps': num_inference_steps,
+              'width': width,
+              'height': height,
+              'generator': generator
+             }
+    if controlnet_checkbox:
+        if controlnet_mode == "depth_map":
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/sd-controlnet-depth",
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            )
+        elif controlnet_mode == "pose_estimation":
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/sd-controlnet-openpose",
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            )
+        elif controlnet_mode == "normal_map":
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/sd-controlnet-normal",
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            )
+        elif controlnet_mode == "scribbles":
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/sd-controlnet-scribble",
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            )
         else:
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/sd-controlnet-canny",
+                cache_dir="./models_cache",
+                torch_dtype=torch_dtype
+            )
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id,
+                                                                 controlnet=controlnet,
+                                                                 torch_dtype=torch_dtype,
+                                                                 safety_checker=None).to(device)
+        params['image'] = controlnet_image
+        params['controlnet_conditioning_scale'] = float(controlnet_strength)
+    else:
+        pipe = StableDiffusionPipeline.from_pretrained(model_id,
+                                                       torch_dtype=torch_dtype,
+                                                       safety_checker=None).to(device)
+    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir)
+    pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir)
+    pipe.unet.load_state_dict({k: lora_scale*v for k, v in pipe.unet.state_dict().items()})
+    pipe.text_encoder.load_state_dict({k: lora_scale*v for k, v in pipe.text_encoder.state_dict().items()})
+    if torch_dtype in (torch.float16, torch.bfloat16):
+        pipe.unet.half()
+        pipe.text_encoder.half()
+    if ip_adapter_checkbox:
+        pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
+        pipe.set_ip_adapter_scale(ip_adapter_scale)
+        params['ip_adapter_image'] = ip_adapter_image
+    pipe.to(device)
+    return pipe(**params).images[0]
 css = """
 #col-container {
 }
 """
+def controlnet_params(show_extra):
+    return gr.update(visible=show_extra)
+with gr.Blocks(css=css, fill_height=True) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # Text-to-Image demo")
         with gr.Row():
+            model_id = gr.Textbox(
+                label="Model ID",
                 max_lines=1,
+                placeholder="Enter model id",
+                value=model_id_default,
             )
+        prompt = gr.Textbox(
+            label="Prompt",
+            max_lines=1,
+            placeholder="Enter your prompt",
+        )
+        negative_prompt = gr.Textbox(
+            label="Negative prompt",
+            max_lines=1,
+            placeholder="Enter your negative prompt",
+        )
+        with gr.Row():
+            seed = gr.Number(
                 label="Seed",
                 minimum=0,
                 maximum=MAX_SEED,
                 step=1,
+                value=42,
+            )
+            guidance_scale = gr.Slider(
+                label="Guidance scale",
+                minimum=0.0,
+                maximum=30.0,
+                step=0.1,
+                value=7.0,  # Replace with defaults that work for your model
+            )
+        with gr.Row():
+            lora_scale = gr.Slider(
+                label="LoRA scale",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=1.0,
             )
+            num_inference_steps = gr.Slider(
+                label="Number of inference steps",
+                minimum=1,
+                maximum=100,
+                step=1,
+                value=20,  # Replace with defaults that work for your model
+            )
+        with gr.Row():
+            controlnet_checkbox = gr.Checkbox(
+                label="ControlNet",
+                value=False
+            )
+            with gr.Column(visible=False) as controlnet_params:
+                controlnet_strength = gr.Slider(
+                    label="ControlNet conditioning scale",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.01,
+                    value=1.0,
+                )
+                controlnet_mode = gr.Dropdown(
+                    label="ControlNet mode",
+                    choices=["edge_detection",
+                             "depth_map",
+                             "pose_estimation",
+                             "normal_map",
+                             "scribbles"],
+                    value="edge_detection",
+                    max_choices=1
+                )
+                controlnet_image = gr.Image(
+                    label="ControlNet condition image",
+                    type="pil",
+                    format="png"
+                )
+            controlnet_checkbox.change(
+                fn=lambda x: gr.Row.update(visible=x),
+                inputs=controlnet_checkbox,
+                outputs=controlnet_params
+            )
+        with gr.Row():
+            ip_adapter_checkbox = gr.Checkbox(
+                label="IPAdapter",
+                value=False
+            )
+            with gr.Column(visible=False) as ip_adapter_params:
+                ip_adapter_scale = gr.Slider(
+                    label="IPAdapter scale",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.01,
+                    value=1.0,
+                )
+                ip_adapter_image = gr.Image(
+                    label="IPAdapter condition image",
+                    type="pil"
+                )
+            ip_adapter_checkbox.change(
+                fn=lambda x: gr.Row.update(visible=x),
+                inputs=ip_adapter_checkbox,
+                outputs=ip_adapter_params
+            )
+        with gr.Accordion("Optional Settings", open=False):
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=512,  # Replace with defaults that work for your model
                 )
                 height = gr.Slider(
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=512,  # Replace with defaults that work for your model
                 )
+        run_button = gr.Button("Run", scale=0, variant="primary")
+        result = gr.Image(label="Result", show_label=False)
     gr.on(
+        triggers=[run_button.click],
         fn=infer,
         inputs=[
             prompt,
             negative_prompt,
             width,
             height,
+            model_id,
+            seed,
+            guidance_scale,
+            lora_scale,
             num_inference_steps,
+            controlnet_checkbox,
+            controlnet_strength,
+            controlnet_mode,
+            controlnet_image,
+            ip_adapter_checkbox,
+            ip_adapter_scale,
+            ip_adapter_image,
         ],
+        outputs=[result],
     )
 if __name__ == "__main__":
+    demo.launch()