Spaces:

omer11a
/

bounded-attention

Running on Zero

App Files Files Community

omer11a commited on Apr 18

Commit

14451ef

•

1 Parent(s): 4159aad

Improved user interface

Browse files

Files changed (2) hide show

app.py +40 -13
bounded_attention.py +39 -3

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ WHITE = 255
 COLORS = ["red", "blue", "green", "orange", "purple", "turquoise", "olive"]
 PROMPT1 = "a ginger kitten and a gray puppy in a yard"
 SUBJECT_TOKEN_INDICES1 = "2,3;6,7"
 FILTER_TOKEN_INDICES1 = "1,4,5,8,9"
 NUM_TOKENS1 = "10"
@@ -158,6 +159,7 @@ FOOTNOTE = """
 def inference(
     boxes,
     prompts,
     subject_token_indices,
     filter_token_indices,
     num_tokens,
@@ -190,9 +192,10 @@ def inference(
     editor = BoundedAttention(
         boxes,
         prompts,
-        subject_token_indices,
         list(range(70, 82)),
         list(range(70, 82)),
         filter_token_indices=filter_token_indices,
         eos_token_index=eos_token_index,
         cross_loss_coef=cross_loss_scale,
@@ -214,6 +217,7 @@ def inference(
 @spaces.GPU(duration=340)
 def generate(
     prompt,
     subject_token_indices,
     filter_token_indices,
     num_tokens,
@@ -231,27 +235,45 @@ def generate(
     seed,
     boxes,
 ):
-    print('boxes in generate', boxes)
     subject_token_indices = convert_token_indices(subject_token_indices, nested=True)
-    if len(boxes) != len(subject_token_indices):
         raise gr.Error("""
             The number of boxes should be equal to the number of subjects.
             Number of boxes drawn: {}, number of subjects: {}.
-        """.format(len(boxes), len(subject_token_indices)))
     filter_token_indices = convert_token_indices(filter_token_indices) if len(filter_token_indices.strip()) > 0 else None
     num_tokens = int(num_tokens) if len(num_tokens.strip()) > 0 else None
     prompts = [prompt.strip(".").strip(",").strip()] * batch_size
     images = inference(
-        boxes, prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
         final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
         classifier_free_guidance_scale, num_iterations, loss_threshold, num_guidance_steps, seed)
     return images
 def convert_token_indices(token_indices, nested=False):
     if nested:
         return [convert_token_indices(indices, nested=False) for indices in token_indices.split(";")]
@@ -331,8 +353,13 @@ def main():
                 placeholder=PROMPT1,
             )
             subject_token_indices = gr.Textbox(
-                label="The token indices of each subject (separate indices for the same subject with commas, and for different subjects with semicolons)",
                 placeholder=SUBJECT_TOKEN_INDICES1,
             )
@@ -393,7 +420,7 @@ def main():
             generate_image_button.click(
                 fn=generate,
                 inputs=[
-                    prompt, subject_token_indices, filter_token_indices, num_tokens,
                     init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
                     classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
                     seed,
@@ -407,31 +434,31 @@ def main():
             gr.Examples(
                 examples=[
                     [
-                        PROMPT1, SUBJECT_TOKEN_INDICES1, FILTER_TOKEN_INDICES1, NUM_TOKENS1,
                         15, 10, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         12,
                     ],
                     [
-                        PROMPT2, "7,8,17;11,12,17;15,16,17", "5,6,9,10,13,14,18,19", "21",
                         25, 18, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         286,
                     ],
                     [
-                        PROMPT3, "7;10;13,14;17", "5,6,8,9,11,12,15,16", "17",
                         18, 12, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         216,
                     ],
                     [
-                        PROMPT4, "9,10;13,14;17", "1,4,5,7,8,11,12,15,16", "17",
                         25, 18, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         82,
                     ],
                     [
-                        PROMPT5, "2,3;6,7;10,11;14,15;18,19", "1,4,5,8,9,12,13,16,17,20,21", "22",
                         18, 12, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         152,
@@ -439,7 +466,7 @@ def main():
                 ],
                 fn=build_example_layout,
                 inputs=[
-                    prompt, subject_token_indices, filter_token_indices, num_tokens,
                     init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
                     classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
                     seed,

 COLORS = ["red", "blue", "green", "orange", "purple", "turquoise", "olive"]
 PROMPT1 = "a ginger kitten and a gray puppy in a yard"
+SUBJECT_SUB_PROMPTS1 = "ginger kitten;gray puppy"
 SUBJECT_TOKEN_INDICES1 = "2,3;6,7"
 FILTER_TOKEN_INDICES1 = "1,4,5,8,9"
 NUM_TOKENS1 = "10"
 def inference(
     boxes,
     prompts,
+    subject_sub_prompts,
     subject_token_indices,
     filter_token_indices,
     num_tokens,
     editor = BoundedAttention(
         boxes,
         prompts,
         list(range(70, 82)),
         list(range(70, 82)),
+        subject_sub_prompts=subject_sub_prompts,
+        subject_token_indices=subject_token_indices,
         filter_token_indices=filter_token_indices,
         eos_token_index=eos_token_index,
         cross_loss_coef=cross_loss_scale,
 @spaces.GPU(duration=340)
 def generate(
     prompt,
+    subject_sub_prompts,
     subject_token_indices,
     filter_token_indices,
     num_tokens,
     seed,
     boxes,
 ):
+    num_subjects = 0
+    subject_sub_prompts = convert_sub_prompts(subject_sub_prompts)
     subject_token_indices = convert_token_indices(subject_token_indices, nested=True)
+    if subject_sub_prompts is not None:
+        num_subjects = len(subject_sub_prompts)
+    if subject_token_indices is not None:
+        num_subjects = len(subject_token_indices)
+    if len(boxes) != num_subjects:
         raise gr.Error("""
             The number of boxes should be equal to the number of subjects.
             Number of boxes drawn: {}, number of subjects: {}.
+        """.format(len(boxes), nun_subjects))
     filter_token_indices = convert_token_indices(filter_token_indices) if len(filter_token_indices.strip()) > 0 else None
     num_tokens = int(num_tokens) if len(num_tokens.strip()) > 0 else None
     prompts = [prompt.strip(".").strip(",").strip()] * batch_size
     images = inference(
+        boxes, prompts, subject_sub_prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
         final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
         classifier_free_guidance_scale, num_iterations, loss_threshold, num_guidance_steps, seed)
     return images
+def convert_sub_prompts(sub_prompts):
+    sub_prompts = sub_prompts.strip()
+    if len(sub_prompts) == 0:
+        return None
+    return [sub_prompt.strip() for sub_prompt in sub_prompts.split(";")]
 def convert_token_indices(token_indices, nested=False):
+    token_indices = token_indices.strip()
+    if len(token_indices) == 0:
+        return None
     if nested:
         return [convert_token_indices(indices, nested=False) for indices in token_indices.split(";")]
                 placeholder=PROMPT1,
             )
+            subject_sub_prompts = gr.Textbox(
+                label="Sub-prompts for each subject (separate with semicolons)",
+                placeholder=SUBJECT_SUB_PROMPTS1,
+            )
             subject_token_indices = gr.Textbox(
+                label="Optional: The token indices of each subject (separate indices for the same subject with commas, and for different subjects with semicolons)",
                 placeholder=SUBJECT_TOKEN_INDICES1,
             )
             generate_image_button.click(
                 fn=generate,
                 inputs=[
+                    prompt, subject_sub_prompts, subject_token_indices, filter_token_indices, num_tokens,
                     init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
                     classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
                     seed,
             gr.Examples(
                 examples=[
                     [
+                        PROMPT1, SUBJECT_SUB_PROMPTS1, SUBJECT_TOKEN_INDICES1, FILTER_TOKEN_INDICES1, NUM_TOKENS1,
                         15, 10, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         12,
                     ],
                     [
+                        PROMPT2, "cute unicorn;pink hedgehog;nerdy owl", "7,8,17;11,12,17;15,16,17", "5,6,9,10,13,14,18,19", "21",
                         25, 18, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         286,
                     ],
                     [
+                        PROMPT3, "astronaut;robot;green alien;spaceship", "7;10;13,14;17", "5,6,8,9,11,12,15,16", "17",
                         18, 12, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         216,
                     ],
                     [
+                        PROMPT4, "semi trailer;concrete mixer;helicopter", "9,10;13,14;17", "1,4,5,7,8,11,12,15,16", "17",
                         25, 18, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         82,
                     ],
                     [
+                        PROMPT5, "golden retriever;german shepherd;boston terrier;english bulldog;border collie", "2,3;6,7;10,11;14,15;18,19", "1,4,5,8,9,12,13,16,17,20,21", "22",
                         18, 12, 15, 3, 1, 1,
                         7.5, 1, 5, 0.2, 8,
                         152,
                 ],
                 fn=build_example_layout,
                 inputs=[
+                    prompt, subject_sub_prompts, subject_token_indices, filter_token_indices, num_tokens,
                     init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
                     classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
                     seed,

bounded_attention.py CHANGED Viewed

@@ -21,9 +21,10 @@ class BoundedAttention(injection_utils.AttentionBase):
         self,
         boxes,
         prompts,
-        subject_token_indices,
         cross_loss_layers,
         self_loss_layers,
         cross_mask_layers=None,
         self_mask_layers=None,
         eos_token_index=None,
@@ -56,6 +57,7 @@ class BoundedAttention(injection_utils.AttentionBase):
         super().__init__()
         self.boxes = boxes
         self.prompts = prompts
         self.subject_token_indices = subject_token_indices
         self.cross_loss_layers = set(cross_loss_layers)
         self.self_loss_layers = set(self_loss_layers)
@@ -186,8 +188,9 @@ class BoundedAttention(injection_utils.AttentionBase):
         self.optimized = False
         return latents
-    def _tokenize(self):
-        ids = self.model.tokenizer.encode(self.prompts[0])
         tokens = self.model.tokenizer.convert_ids_to_tokens(ids, skip_special_tokens=True)
         return [token[:-4] for token in tokens]  # remove ending </w>
@@ -195,6 +198,38 @@ class BoundedAttention(injection_utils.AttentionBase):
         tagged_tokens = nltk.pos_tag(self._tokenize())
         return [type(self).TAG_RULES.get(token, tag) for token, tag in tagged_tokens]
     def _determine_eos_token(self):
         tokens = self._tokenize()
         eos_token_index = len(tokens) + 1
@@ -224,6 +259,7 @@ class BoundedAttention(injection_utils.AttentionBase):
         self.leading_token_indices = leading_token_indices
     def _determine_tokens(self):
         self._determine_eos_token()
         self._determine_filter_tokens()
         self._determine_leading_tokens()

         self,
         boxes,
         prompts,
         cross_loss_layers,
         self_loss_layers,
+        subject_sub_prompts=None,
+        subject_token_indices=None,
         cross_mask_layers=None,
         self_mask_layers=None,
         eos_token_index=None,
         super().__init__()
         self.boxes = boxes
         self.prompts = prompts
+        self.subject_sub_prompts = subject_sub_prompts
         self.subject_token_indices = subject_token_indices
         self.cross_loss_layers = set(cross_loss_layers)
         self.self_loss_layers = set(self_loss_layers)
         self.optimized = False
         return latents
+    def _tokenize(self, prompt=None):
+        prompt = self.prompts[0] if prompt is None else prompt
+        ids = self.model.tokenizer.encode(prompt)
         tokens = self.model.tokenizer.convert_ids_to_tokens(ids, skip_special_tokens=True)
         return [token[:-4] for token in tokens]  # remove ending </w>
         tagged_tokens = nltk.pos_tag(self._tokenize())
         return [type(self).TAG_RULES.get(token, tag) for token, tag in tagged_tokens]
+    def _determine_subject_tokens(self):
+        if self.subject_token_indices is not None:
+            return
+        if self.subject_sub_prompts is None:
+            raise ValueError('Missing subject sub-prompts.')
+        tokens = self._tokenize()
+        matches = []
+        self.subject_token_indices = []
+        for sub_prompt in self.subject_sub_prompts:
+            token_indices = self._determine_specific_subject_tokens(tokens, sub_prompt, matches)
+            matches.append(token_indices[0])
+            self.subject_token_indices.append(token_indices)
+    def _determine_specific_subject_tokens(self, tokens, sub_prompt, previous_matches):
+        sub_tokens = self._tokenize(sub_prompt)
+        sub_len = len(sub_tokens)
+        matches = []
+        for i in range(len(tokens)):
+            if tokens[i] == sub_tokens[0] and tokens[i:i + sub_len] == sub_tokens:
+                matches.append(i + 1)
+        if len(matches) == 0:
+            raise ValueError(f'Couldn\'t locate sub-prompt: {sub_prompt}.')
+        new_matches = [i for i in matches if i not in previous_matches]
+        last_match = new_matches[0] if len(new_matches) > 0 else matches[-1]
+        return list(range(last_match, last_match + sub_len))
     def _determine_eos_token(self):
         tokens = self._tokenize()
         eos_token_index = len(tokens) + 1
         self.leading_token_indices = leading_token_indices
     def _determine_tokens(self):
+        self._determine_subject_tokens()
         self._determine_eos_token()
         self._determine_filter_tokens()
         self._determine_leading_tokens()