Spaces:

omer11a
/

bounded-attention

Running on Zero

App Files Files Community

omer11a commited on Apr 3, 2024

Commit

78b6f81

1 Parent(s): 1e9f321

Load model directly to GPU

Browse files

Files changed (2) hide show

app.py +7 -15
injection_utils.py +25 -3

app.py CHANGED Viewed

@@ -11,8 +11,6 @@ from injection_utils import regiter_attention_editor_diffusers
 from bounded_attention import BoundedAttention
 from pytorch_lightning import seed_everything
-from functools import partial
 MODEL_PATH = "stabilityai/stable-diffusion-xl-base-1.0"
 RESOLUTION = 256
 MIN_SIZE = 0.01
@@ -113,7 +111,6 @@ FOOTNOTE = """
 def inference(
-    model,
     boxes,
     prompts,
     subject_token_indices,
@@ -134,7 +131,10 @@ def inference(
         raise gr.Error("cuda is not available")
     device = torch.device("cuda")
-    model.to(device).half()
     seed_everything(seed)
     start_code = torch.randn([len(prompts), 4, 128, 128], device=device)
@@ -159,15 +159,11 @@ def inference(
     )
     register_attention_editor_diffusers(model, editor)
-    images = model(prompts, latents=start_code, guidance_scale=classifier_free_guidance_scale).images
-    unregister_attention_editor_diffusers(model)
-    model.double().to(torch.device("cpu"))
-    return images
 @spaces.GPU(duration=300)
 def generate(
-    model,
     prompt,
     subject_token_indices,
     filter_token_indices,
@@ -197,7 +193,7 @@ def generate(
     prompts = [prompt.strip(".").strip(",").strip()] * batch_size
     images = inference(
-        model, boxes, prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
         final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale, classifier_free_guidance_scale,
         num_iterations, loss_threshold, num_guidance_steps, seed)
@@ -253,10 +249,6 @@ def clear(batch_size):
 def main():
     nltk.download("averaged_perceptron_tagger")
-    scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
-    model = StableDiffusionXLPipeline.from_pretrained(MODEL_PATH, scheduler=scheduler)
-    model.unet.set_default_attn_processor()
-    model.enable_sequential_cpu_offload()
     with gr.Blocks(
             css=CSS,
@@ -328,7 +320,7 @@ def main():
             )
             generate_image_button.click(
-                fn=partial(generate, model),
                 inputs=[
                     prompt, subject_token_indices, filter_token_indices, num_tokens,
                     init_step_size, final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale,

 from bounded_attention import BoundedAttention
 from pytorch_lightning import seed_everything
 MODEL_PATH = "stabilityai/stable-diffusion-xl-base-1.0"
 RESOLUTION = 256
 MIN_SIZE = 0.01
 def inference(
     boxes,
     prompts,
     subject_token_indices,
         raise gr.Error("cuda is not available")
     device = torch.device("cuda")
+    scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
+    model = StableDiffusionXLPipeline.from_pretrained(MODEL_PATH, scheduler=scheduler, torch_dtype=torch.float16).to(device)
+    model.unet.set_default_attn_processor()
+    model.enable_sequential_cpu_offload()
     seed_everything(seed)
     start_code = torch.randn([len(prompts), 4, 128, 128], device=device)
     )
     register_attention_editor_diffusers(model, editor)
+    return model(prompts, latents=start_code, guidance_scale=classifier_free_guidance_scale).images
 @spaces.GPU(duration=300)
 def generate(
     prompt,
     subject_token_indices,
     filter_token_indices,
     prompts = [prompt.strip(".").strip(",").strip()] * batch_size
     images = inference(
+        boxes, prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
         final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale, classifier_free_guidance_scale,
         num_iterations, loss_threshold, num_guidance_steps, seed)
 def main():
     nltk.download("averaged_perceptron_tagger")
     with gr.Blocks(
             css=CSS,
             )
             generate_image_button.click(
+                fn=generate,
                 inputs=[
                     prompt, subject_token_indices, filter_token_indices, num_tokens,
                     init_step_size, final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale,

injection_utils.py CHANGED Viewed

@@ -53,7 +53,7 @@ class AttentionBase:
         self.cur_att_layer = 0
-def regiter_attention_editor_diffusers(model, editor: AttentionBase):
     """
     Register a attention editor to Diffuser Pipeline, refer from [Prompt-to-Prompt]
     """
@@ -89,13 +89,14 @@ def regiter_attention_editor_diffusers(model, editor: AttentionBase):
         return forward
-    def register_editor(net, count, place_in_unet, prefix=''):
         for name, subnet in net.named_children():
             if net.__class__.__name__ == 'Attention':  # spatial Transformer layer
                 net.forward = ca_forward(net, place_in_unet)
                 return count + 1
             elif hasattr(net, 'children'):
-                count = register_editor(subnet, count, place_in_unet, prefix=prefix + '\t')
         return count
     cross_att_count = 0
@@ -110,3 +111,24 @@ def regiter_attention_editor_diffusers(model, editor: AttentionBase):
     editor.num_att_layers = cross_att_count
     editor.model = model
     model.editor = editor

         self.cur_att_layer = 0
+def register_attention_editor_diffusers(model, editor: AttentionBase):
     """
     Register a attention editor to Diffuser Pipeline, refer from [Prompt-to-Prompt]
     """
         return forward
+    def register_editor(net, count, place_in_unet):
         for name, subnet in net.named_children():
             if net.__class__.__name__ == 'Attention':  # spatial Transformer layer
+                net.original_forward = net.forward
                 net.forward = ca_forward(net, place_in_unet)
                 return count + 1
             elif hasattr(net, 'children'):
+                count = register_editor(subnet, count, place_in_unet)
         return count
     cross_att_count = 0
     editor.num_att_layers = cross_att_count
     editor.model = model
     model.editor = editor
+def unregister_attention_editor_diffusers(model):
+    def unregister_editor(net):
+        for name, subnet in net.named_children():
+            if net.__class__.__name__ == 'Attention':  # spatial Transformer layer
+                net.forward = net.original_forward
+                net.original_forward = None
+            elif hasattr(net, 'children'):
+                unregister_editor(subnet)
+    for net_name, net in model.unet.named_children():
+        if "down" in net_name:
+            unregister_editor(net)
+        elif "mid" in net_name:
+            unregister_editor(net)
+        elif "up" in net_name:
+            unregister_editor(net)
+    editor.model = None
+    model.editor = None