Spaces:

king159
/

PAID

Runtime error

App Files Files Community

king159 commited on Mar 27

Commit

e4924c3

•

1 Parent(s): 771cc14

fix sdxl

Browse files

Files changed (5) hide show

.gitattributes +1 -1
.gitignore +1 -0
app.py +80 -29
pipeline_interpolated_sdxl.py +6 -0
pipeline_interpolated_stable_diffusion.py +4 -4

.gitattributes CHANGED Viewed

@@ -32,4 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

app.py CHANGED Viewed

@@ -32,11 +32,13 @@ article = r"""
 <br>
 If you found this demo/our paper useful, please consider citing:
 ```bibtex
-@article{he024paid,
-    title={PAID:(Prompt-guided) Attention Interpolation of Text-to-Image Diffusion},
-    author={He, Qiyuan and Wang, Jinghao and Liu, Ziwei and Angle, Yao},
-    journal={},
-    year={2024}
 }
 ```
 📧 **Contact**
@@ -50,18 +52,17 @@ USE_TORCH_COMPILE = False
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD") == "1"
 PREVIEW_IMAGES = False
-dtype = torch.float32
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 pipeline = InterpolationStableDiffusionPipeline(
     repo_name="runwayml/stable-diffusion-v1-5",
     guidance_scale=10.0,
     scheduler_name="unipc",
 )
-pipeline.to(device, dtype=dtype)
 def change_model_fn(model_name: str) -> None:
-    global pipeline
     name_mapping = {
         "SD1.4-521": "CompVis/stable-diffusion-v1-4",
         "SD1.5-512": "runwayml/stable-diffusion-v1-5",
@@ -69,17 +70,21 @@ def change_model_fn(model_name: str) -> None:
         "SDXL-1024": "stabilityai/stable-diffusion-xl-base-1.0",
     }
     if "XL" not in model_name:
-        pipeline = InterpolationStableDiffusionPipeline(
             repo_name=name_mapping[model_name],
             guidance_scale=10.0,
             scheduler_name="unipc",
         )
-        pipeline.to(device, dtype=dtype)
     else:
-        pipeline = InterpolationStableDiffusionXLPipeline.from_pretrained(
-            name_mapping[model_name]
         )
-        pipeline.to(device, dtype=dtype)
 def save_image(img, index):
@@ -107,7 +112,7 @@ def plot_gemma_fn(alpha: float, beta: float, size: int) -> pd.DataFrame:
     )
-def get_example() -> list:
     case = [
         [
             "A photo of dog, best quality, extremely detailed",
@@ -115,7 +120,7 @@ def get_example() -> list:
             3,
             6,
             3,
-            "A photo of a dog driving a car, logical, best quality, extremely detailed",
             "monochrome, lowres, bad anatomy, worst quality, low quality",
             "SD1.5-512",
             6.1 / 50,
@@ -125,11 +130,52 @@ def get_example() -> list:
             "self",
             1002,
             True,
-        ]
     ]
     return case
 def dynamic_gallery_fn(interpolation_size: int):
     return gr.Gallery(
@@ -192,6 +238,9 @@ def generate(
             negative_prompt=negative_prompt,
             guidance_scale=guidance_scale,
         )
         if interpolation_size == 3:
             final_images = images
             break
@@ -206,7 +255,7 @@ def generate(
 interpolation_size = None
-with gr.Blocks() as demo:
     gr.Markdown(title)
     gr.Markdown(description)
     with gr.Group():
@@ -225,7 +274,7 @@ with gr.Blocks() as demo:
             value="A photo of car, best quality, extremely detaile",
         )
         result = gr.Gallery(label="Result", show_label=False, rows=1, columns=3)
-    generate_button = gr.Button("Generate", variant="primary")
     with gr.Accordion("Advanced options", open=True):
         with gr.Group():
             with gr.Row():
@@ -242,14 +291,14 @@ with gr.Blocks() as demo:
                         label="alpha",
                         minimum=1,
                         maximum=50,
-                        step=0.1,
                         value=6.0,
                     )
                     beta = gr.Slider(
                         label="beta",
                         minimum=1,
                         maximum=50,
-                        step=0.1,
                         value=3.0,
                     )
                 gamma_plot = gr.LinePlot(
@@ -346,6 +395,7 @@ with gr.Blocks() as demo:
                 label="Model",
                 value="SD1.5-512",
                 interactive=True,
             )
             with gr.Column():
                 seed = gr.Slider(
@@ -381,8 +431,6 @@ with gr.Blocks() as demo:
             seed,
             same_latent,
         ],
-        outputs=result,
-        fn=generate,
         cache_examples=CACHE_EXAMPLES,
     )
@@ -395,7 +443,15 @@ with gr.Blocks() as demo:
     interpolation_size.change(
         fn=plot_gemma_fn, inputs=[alpha, beta, interpolation_size], outputs=gamma_plot
     )
-    model_choice.change(fn=change_model_fn, inputs=model_choice)
     inputs = [
         prompt1,
         prompt2,
@@ -423,9 +479,4 @@ with gr.Blocks() as demo:
     )
     gr.Markdown(article)
-with gr.Blocks(css="style.css") as demo_with_history:
-    with gr.Tab("App"):
-        demo.render()
-if __name__ == "__main__":
-    demo_with_history.queue(max_size=20).launch()

 <br>
 If you found this demo/our paper useful, please consider citing:
 ```bibtex
+@misc{he2024aid,
+      title={AID: Attention Interpolation of Text-to-Image Diffusion},
+      author={Qiyuan He and Jinghao Wang and Ziwei Liu and Angela Yao},
+      year={2024},
+      eprint={2403.17924},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
 }
 ```
 📧 **Contact**
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD") == "1"
 PREVIEW_IMAGES = False
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 pipeline = InterpolationStableDiffusionPipeline(
     repo_name="runwayml/stable-diffusion-v1-5",
     guidance_scale=10.0,
     scheduler_name="unipc",
 )
+pipeline.to(device, dtype=torch.float32)
 def change_model_fn(model_name: str) -> None:
+    global device
     name_mapping = {
         "SD1.4-521": "CompVis/stable-diffusion-v1-4",
         "SD1.5-512": "runwayml/stable-diffusion-v1-5",
         "SDXL-1024": "stabilityai/stable-diffusion-xl-base-1.0",
     }
     if "XL" not in model_name:
+        globals()["pipeline"] = InterpolationStableDiffusionPipeline(
             repo_name=name_mapping[model_name],
             guidance_scale=10.0,
             scheduler_name="unipc",
         )
+        globals()["pipeline"].to(device, dtype=torch.float32)
     else:
+        if device == torch.device("cpu"):
+            dtype = torch.float32
+        else:
+            dtype = torch.float16
+        globals()["pipeline"] = InterpolationStableDiffusionXLPipeline.from_pretrained(
+            name_mapping[model_name], torch_dtype=dtype
         )
+        globals()["pipeline"].to(device)
 def save_image(img, index):
     )
+def get_example() -> list[list[str | float | int]]:
     case = [
         [
             "A photo of dog, best quality, extremely detailed",
             3,
             6,
             3,
+            "A car with dog furry texture, best quality, extremely detailed",
             "monochrome, lowres, bad anatomy, worst quality, low quality",
             "SD1.5-512",
             6.1 / 50,
             "self",
             1002,
             True,
+        ],
+        [
+            "A photo of dog, best quality, extremely detailed",
+            "A photo of car, best quality, extremely detailed",
+            7,
+            8,
+            8,
+            "A toy named dog-car, best quality, extremely detailed",
+            "monochrome, lowres, bad anatomy, worst quality, low quality",
+            "SD1.5-512",
+            8.1 / 50,
+            10,
+            50,
+            "fused_inner",
+            "self",
+            1002,
+            True,
+        ],
+        [
+            "anime artwork a Pokemon called Pikachu sitting on the grass, dramatic, anime style, key visual, vibrant, studio anime, highly detailed",
+            "anime artwork a beautiful girl, dramatic, anime style, key visual, vibrant, studio anime, highly detailed",
+            7,
+            3,
+            3,
+            None,
+            "monochrome, lowres, bad anatomy, worst quality, low quality",
+            "SDXL-1024",
+            25 / 50,
+            10,
+            50,
+            "fused_outer",
+            "self",
+            1002,
+            False,
+        ],
     ]
     return case
+def change_generate_button_fn(enable: int) -> gr.Button:
+    if enable == 0:
+        return gr.Button(interactive=False, value="Switching Model...")
+    else:
+        return gr.Button(interactive=True, value="Generate")
 def dynamic_gallery_fn(interpolation_size: int):
     return gr.Gallery(
             negative_prompt=negative_prompt,
             guidance_scale=guidance_scale,
         )
+        if hasattr(images, "images"):
+            # for sdxl
+            images = np.array(images.images)
         if interpolation_size == 3:
             final_images = images
             break
 interpolation_size = None
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown(title)
     gr.Markdown(description)
     with gr.Group():
             value="A photo of car, best quality, extremely detaile",
         )
         result = gr.Gallery(label="Result", show_label=False, rows=1, columns=3)
+    generate_button = gr.Button(value="Generate", variant="primary")
     with gr.Accordion("Advanced options", open=True):
         with gr.Group():
             with gr.Row():
                         label="alpha",
                         minimum=1,
                         maximum=50,
+                        step=1,
                         value=6.0,
                     )
                     beta = gr.Slider(
                         label="beta",
                         minimum=1,
                         maximum=50,
+                        step=1,
                         value=3.0,
                     )
                 gamma_plot = gr.LinePlot(
                 label="Model",
                 value="SD1.5-512",
                 interactive=True,
+                info="SDXL will run on float16 while the rest will run on float32.",
             )
             with gr.Column():
                 seed = gr.Slider(
             seed,
             same_latent,
         ],
         cache_examples=CACHE_EXAMPLES,
     )
     interpolation_size.change(
         fn=plot_gemma_fn, inputs=[alpha, beta, interpolation_size], outputs=gamma_plot
     )
+    model_choice.change(
+        fn=change_generate_button_fn,
+        inputs=gr.Number(0, visible=False),
+        outputs=generate_button,
+    ).then(fn=change_model_fn, inputs=model_choice).then(
+        fn=change_generate_button_fn,
+        inputs=gr.Number(1, visible=False),
+        outputs=generate_button,
+    )
     inputs = [
         prompt1,
         prompt2,
     )
     gr.Markdown(article)
+demo.launch()

pipeline_interpolated_sdxl.py CHANGED Viewed

@@ -403,6 +403,12 @@ class InterpolationStableDiffusionXLPipeline(
         else:
             self.watermark = None
     def generate_latent(
         self, generator: Optional[torch.Generator] = None, torch_device: str = "cpu"
     ) -> torch.FloatTensor:

         else:
             self.watermark = None
+    def to(self, *args, **kwargs):
+        super().to(*args, **kwargs)
+        self.vae.to(*args, **kwargs)
+        self.text_encoder.to(*args, **kwargs)
+        self.unet.to(*args, **kwargs)
     def generate_latent(
         self, generator: Optional[torch.Generator] = None, torch_device: str = "cpu"
     ) -> torch.FloatTensor:

pipeline_interpolated_stable_diffusion.py CHANGED Viewed

@@ -286,7 +286,7 @@ class InterpolationStableDiffusionPipeline:
                 noise_pred = self.unet(
                     latent_model_input, t, encoder_hidden_states=embs
                 ).sample
-                attn_proc = AttnProcessor()
                 self.unet.set_attn_processor(processor=attn_proc)
                 noise_uncond = self.unet(
                     latent_model_input, t, encoder_hidden_states=uncond_embs
@@ -477,7 +477,7 @@ class InterpolationStableDiffusionPipeline:
             t=it,
             is_fused=True,
         )
-        self_attn_proc = AttnProcessor()
         procs_dict = {
             "pure_inner": pure_inner_attn_proc,
             "fused_inner": fused_inner_attn_proc,
@@ -503,7 +503,7 @@ class InterpolationStableDiffusionPipeline:
                 noise_pred = self.unet(
                     latent_model_input, t, encoder_hidden_states=embs
                 ).sample
-                attn_proc = AttnProcessor()
                 self.unet.set_attn_processor(processor=attn_proc)
                 noise_uncond = self.unet(
                     latent_model_input, t, encoder_hidden_states=uncond_embs
@@ -544,7 +544,7 @@ class InterpolationStableDiffusionPipeline:
         Returns:
             numpy.ndarray: The interpolated images.
         """
-        self.unet.set_attn_processor(processor=AttnProcessor())
         start_emb = self.prompt_to_embedding(text_1)
         end_emb = self.prompt_to_embedding(text_2)
         neg_emb = self.prompt_to_embedding(negative_prompt)

                 noise_pred = self.unet(
                     latent_model_input, t, encoder_hidden_states=embs
                 ).sample
+                attn_proc = AttnProcessor2_0()
                 self.unet.set_attn_processor(processor=attn_proc)
                 noise_uncond = self.unet(
                     latent_model_input, t, encoder_hidden_states=uncond_embs
             t=it,
             is_fused=True,
         )
+        self_attn_proc = AttnProcessor2_0()
         procs_dict = {
             "pure_inner": pure_inner_attn_proc,
             "fused_inner": fused_inner_attn_proc,
                 noise_pred = self.unet(
                     latent_model_input, t, encoder_hidden_states=embs
                 ).sample
+                attn_proc = AttnProcessor2_0()
                 self.unet.set_attn_processor(processor=attn_proc)
                 noise_uncond = self.unet(
                     latent_model_input, t, encoder_hidden_states=uncond_embs
         Returns:
             numpy.ndarray: The interpolated images.
         """
+        self.unet.set_attn_processor(processor=AttnProcessor2_0())
         start_emb = self.prompt_to_embedding(text_1)
         end_emb = self.prompt_to_embedding(text_2)
         neg_emb = self.prompt_to_embedding(negative_prompt)