omer11a commited on
Commit
14451ef
1 Parent(s): 4159aad

Improved user interface

Browse files
Files changed (2) hide show
  1. app.py +40 -13
  2. bounded_attention.py +39 -3
app.py CHANGED
@@ -20,6 +20,7 @@ WHITE = 255
20
  COLORS = ["red", "blue", "green", "orange", "purple", "turquoise", "olive"]
21
 
22
  PROMPT1 = "a ginger kitten and a gray puppy in a yard"
 
23
  SUBJECT_TOKEN_INDICES1 = "2,3;6,7"
24
  FILTER_TOKEN_INDICES1 = "1,4,5,8,9"
25
  NUM_TOKENS1 = "10"
@@ -158,6 +159,7 @@ FOOTNOTE = """
158
  def inference(
159
  boxes,
160
  prompts,
 
161
  subject_token_indices,
162
  filter_token_indices,
163
  num_tokens,
@@ -190,9 +192,10 @@ def inference(
190
  editor = BoundedAttention(
191
  boxes,
192
  prompts,
193
- subject_token_indices,
194
  list(range(70, 82)),
195
  list(range(70, 82)),
 
 
196
  filter_token_indices=filter_token_indices,
197
  eos_token_index=eos_token_index,
198
  cross_loss_coef=cross_loss_scale,
@@ -214,6 +217,7 @@ def inference(
214
  @spaces.GPU(duration=340)
215
  def generate(
216
  prompt,
 
217
  subject_token_indices,
218
  filter_token_indices,
219
  num_tokens,
@@ -231,27 +235,45 @@ def generate(
231
  seed,
232
  boxes,
233
  ):
234
- print('boxes in generate', boxes)
 
235
  subject_token_indices = convert_token_indices(subject_token_indices, nested=True)
236
- if len(boxes) != len(subject_token_indices):
 
 
 
 
 
237
  raise gr.Error("""
238
  The number of boxes should be equal to the number of subjects.
239
  Number of boxes drawn: {}, number of subjects: {}.
240
- """.format(len(boxes), len(subject_token_indices)))
241
 
242
  filter_token_indices = convert_token_indices(filter_token_indices) if len(filter_token_indices.strip()) > 0 else None
243
  num_tokens = int(num_tokens) if len(num_tokens.strip()) > 0 else None
244
  prompts = [prompt.strip(".").strip(",").strip()] * batch_size
245
 
246
  images = inference(
247
- boxes, prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
248
  final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
249
  classifier_free_guidance_scale, num_iterations, loss_threshold, num_guidance_steps, seed)
250
 
251
  return images
252
 
253
 
 
 
 
 
 
 
 
 
254
  def convert_token_indices(token_indices, nested=False):
 
 
 
 
255
  if nested:
256
  return [convert_token_indices(indices, nested=False) for indices in token_indices.split(";")]
257
 
@@ -331,8 +353,13 @@ def main():
331
  placeholder=PROMPT1,
332
  )
333
 
 
 
 
 
 
334
  subject_token_indices = gr.Textbox(
335
- label="The token indices of each subject (separate indices for the same subject with commas, and for different subjects with semicolons)",
336
  placeholder=SUBJECT_TOKEN_INDICES1,
337
  )
338
 
@@ -393,7 +420,7 @@ def main():
393
  generate_image_button.click(
394
  fn=generate,
395
  inputs=[
396
- prompt, subject_token_indices, filter_token_indices, num_tokens,
397
  init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
398
  classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
399
  seed,
@@ -407,31 +434,31 @@ def main():
407
  gr.Examples(
408
  examples=[
409
  [
410
- PROMPT1, SUBJECT_TOKEN_INDICES1, FILTER_TOKEN_INDICES1, NUM_TOKENS1,
411
  15, 10, 15, 3, 1, 1,
412
  7.5, 1, 5, 0.2, 8,
413
  12,
414
  ],
415
  [
416
- PROMPT2, "7,8,17;11,12,17;15,16,17", "5,6,9,10,13,14,18,19", "21",
417
  25, 18, 15, 3, 1, 1,
418
  7.5, 1, 5, 0.2, 8,
419
  286,
420
  ],
421
  [
422
- PROMPT3, "7;10;13,14;17", "5,6,8,9,11,12,15,16", "17",
423
  18, 12, 15, 3, 1, 1,
424
  7.5, 1, 5, 0.2, 8,
425
  216,
426
  ],
427
  [
428
- PROMPT4, "9,10;13,14;17", "1,4,5,7,8,11,12,15,16", "17",
429
  25, 18, 15, 3, 1, 1,
430
  7.5, 1, 5, 0.2, 8,
431
  82,
432
  ],
433
  [
434
- PROMPT5, "2,3;6,7;10,11;14,15;18,19", "1,4,5,8,9,12,13,16,17,20,21", "22",
435
  18, 12, 15, 3, 1, 1,
436
  7.5, 1, 5, 0.2, 8,
437
  152,
@@ -439,7 +466,7 @@ def main():
439
  ],
440
  fn=build_example_layout,
441
  inputs=[
442
- prompt, subject_token_indices, filter_token_indices, num_tokens,
443
  init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
444
  classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
445
  seed,
 
20
  COLORS = ["red", "blue", "green", "orange", "purple", "turquoise", "olive"]
21
 
22
  PROMPT1 = "a ginger kitten and a gray puppy in a yard"
23
+ SUBJECT_SUB_PROMPTS1 = "ginger kitten;gray puppy"
24
  SUBJECT_TOKEN_INDICES1 = "2,3;6,7"
25
  FILTER_TOKEN_INDICES1 = "1,4,5,8,9"
26
  NUM_TOKENS1 = "10"
 
159
  def inference(
160
  boxes,
161
  prompts,
162
+ subject_sub_prompts,
163
  subject_token_indices,
164
  filter_token_indices,
165
  num_tokens,
 
192
  editor = BoundedAttention(
193
  boxes,
194
  prompts,
 
195
  list(range(70, 82)),
196
  list(range(70, 82)),
197
+ subject_sub_prompts=subject_sub_prompts,
198
+ subject_token_indices=subject_token_indices,
199
  filter_token_indices=filter_token_indices,
200
  eos_token_index=eos_token_index,
201
  cross_loss_coef=cross_loss_scale,
 
217
  @spaces.GPU(duration=340)
218
  def generate(
219
  prompt,
220
+ subject_sub_prompts,
221
  subject_token_indices,
222
  filter_token_indices,
223
  num_tokens,
 
235
  seed,
236
  boxes,
237
  ):
238
+ num_subjects = 0
239
+ subject_sub_prompts = convert_sub_prompts(subject_sub_prompts)
240
  subject_token_indices = convert_token_indices(subject_token_indices, nested=True)
241
+ if subject_sub_prompts is not None:
242
+ num_subjects = len(subject_sub_prompts)
243
+ if subject_token_indices is not None:
244
+ num_subjects = len(subject_token_indices)
245
+
246
+ if len(boxes) != num_subjects:
247
  raise gr.Error("""
248
  The number of boxes should be equal to the number of subjects.
249
  Number of boxes drawn: {}, number of subjects: {}.
250
+ """.format(len(boxes), nun_subjects))
251
 
252
  filter_token_indices = convert_token_indices(filter_token_indices) if len(filter_token_indices.strip()) > 0 else None
253
  num_tokens = int(num_tokens) if len(num_tokens.strip()) > 0 else None
254
  prompts = [prompt.strip(".").strip(",").strip()] * batch_size
255
 
256
  images = inference(
257
+ boxes, prompts, subject_sub_prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
258
  final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
259
  classifier_free_guidance_scale, num_iterations, loss_threshold, num_guidance_steps, seed)
260
 
261
  return images
262
 
263
 
264
+ def convert_sub_prompts(sub_prompts):
265
+ sub_prompts = sub_prompts.strip()
266
+ if len(sub_prompts) == 0:
267
+ return None
268
+
269
+ return [sub_prompt.strip() for sub_prompt in sub_prompts.split(";")]
270
+
271
+
272
  def convert_token_indices(token_indices, nested=False):
273
+ token_indices = token_indices.strip()
274
+ if len(token_indices) == 0:
275
+ return None
276
+
277
  if nested:
278
  return [convert_token_indices(indices, nested=False) for indices in token_indices.split(";")]
279
 
 
353
  placeholder=PROMPT1,
354
  )
355
 
356
+ subject_sub_prompts = gr.Textbox(
357
+ label="Sub-prompts for each subject (separate with semicolons)",
358
+ placeholder=SUBJECT_SUB_PROMPTS1,
359
+ )
360
+
361
  subject_token_indices = gr.Textbox(
362
+ label="Optional: The token indices of each subject (separate indices for the same subject with commas, and for different subjects with semicolons)",
363
  placeholder=SUBJECT_TOKEN_INDICES1,
364
  )
365
 
 
420
  generate_image_button.click(
421
  fn=generate,
422
  inputs=[
423
+ prompt, subject_sub_prompts, subject_token_indices, filter_token_indices, num_tokens,
424
  init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
425
  classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
426
  seed,
 
434
  gr.Examples(
435
  examples=[
436
  [
437
+ PROMPT1, SUBJECT_SUB_PROMPTS1, SUBJECT_TOKEN_INDICES1, FILTER_TOKEN_INDICES1, NUM_TOKENS1,
438
  15, 10, 15, 3, 1, 1,
439
  7.5, 1, 5, 0.2, 8,
440
  12,
441
  ],
442
  [
443
+ PROMPT2, "cute unicorn;pink hedgehog;nerdy owl", "7,8,17;11,12,17;15,16,17", "5,6,9,10,13,14,18,19", "21",
444
  25, 18, 15, 3, 1, 1,
445
  7.5, 1, 5, 0.2, 8,
446
  286,
447
  ],
448
  [
449
+ PROMPT3, "astronaut;robot;green alien;spaceship", "7;10;13,14;17", "5,6,8,9,11,12,15,16", "17",
450
  18, 12, 15, 3, 1, 1,
451
  7.5, 1, 5, 0.2, 8,
452
  216,
453
  ],
454
  [
455
+ PROMPT4, "semi trailer;concrete mixer;helicopter", "9,10;13,14;17", "1,4,5,7,8,11,12,15,16", "17",
456
  25, 18, 15, 3, 1, 1,
457
  7.5, 1, 5, 0.2, 8,
458
  82,
459
  ],
460
  [
461
+ PROMPT5, "golden retriever;german shepherd;boston terrier;english bulldog;border collie", "2,3;6,7;10,11;14,15;18,19", "1,4,5,8,9,12,13,16,17,20,21", "22",
462
  18, 12, 15, 3, 1, 1,
463
  7.5, 1, 5, 0.2, 8,
464
  152,
 
466
  ],
467
  fn=build_example_layout,
468
  inputs=[
469
+ prompt, subject_sub_prompts, subject_token_indices, filter_token_indices, num_tokens,
470
  init_step_size, final_step_size, first_refinement_step, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
471
  classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
472
  seed,
bounded_attention.py CHANGED
@@ -21,9 +21,10 @@ class BoundedAttention(injection_utils.AttentionBase):
21
  self,
22
  boxes,
23
  prompts,
24
- subject_token_indices,
25
  cross_loss_layers,
26
  self_loss_layers,
 
 
27
  cross_mask_layers=None,
28
  self_mask_layers=None,
29
  eos_token_index=None,
@@ -56,6 +57,7 @@ class BoundedAttention(injection_utils.AttentionBase):
56
  super().__init__()
57
  self.boxes = boxes
58
  self.prompts = prompts
 
59
  self.subject_token_indices = subject_token_indices
60
  self.cross_loss_layers = set(cross_loss_layers)
61
  self.self_loss_layers = set(self_loss_layers)
@@ -186,8 +188,9 @@ class BoundedAttention(injection_utils.AttentionBase):
186
  self.optimized = False
187
  return latents
188
 
189
- def _tokenize(self):
190
- ids = self.model.tokenizer.encode(self.prompts[0])
 
191
  tokens = self.model.tokenizer.convert_ids_to_tokens(ids, skip_special_tokens=True)
192
  return [token[:-4] for token in tokens] # remove ending </w>
193
 
@@ -195,6 +198,38 @@ class BoundedAttention(injection_utils.AttentionBase):
195
  tagged_tokens = nltk.pos_tag(self._tokenize())
196
  return [type(self).TAG_RULES.get(token, tag) for token, tag in tagged_tokens]
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  def _determine_eos_token(self):
199
  tokens = self._tokenize()
200
  eos_token_index = len(tokens) + 1
@@ -224,6 +259,7 @@ class BoundedAttention(injection_utils.AttentionBase):
224
  self.leading_token_indices = leading_token_indices
225
 
226
  def _determine_tokens(self):
 
227
  self._determine_eos_token()
228
  self._determine_filter_tokens()
229
  self._determine_leading_tokens()
 
21
  self,
22
  boxes,
23
  prompts,
 
24
  cross_loss_layers,
25
  self_loss_layers,
26
+ subject_sub_prompts=None,
27
+ subject_token_indices=None,
28
  cross_mask_layers=None,
29
  self_mask_layers=None,
30
  eos_token_index=None,
 
57
  super().__init__()
58
  self.boxes = boxes
59
  self.prompts = prompts
60
+ self.subject_sub_prompts = subject_sub_prompts
61
  self.subject_token_indices = subject_token_indices
62
  self.cross_loss_layers = set(cross_loss_layers)
63
  self.self_loss_layers = set(self_loss_layers)
 
188
  self.optimized = False
189
  return latents
190
 
191
+ def _tokenize(self, prompt=None):
192
+ prompt = self.prompts[0] if prompt is None else prompt
193
+ ids = self.model.tokenizer.encode(prompt)
194
  tokens = self.model.tokenizer.convert_ids_to_tokens(ids, skip_special_tokens=True)
195
  return [token[:-4] for token in tokens] # remove ending </w>
196
 
 
198
  tagged_tokens = nltk.pos_tag(self._tokenize())
199
  return [type(self).TAG_RULES.get(token, tag) for token, tag in tagged_tokens]
200
 
201
+ def _determine_subject_tokens(self):
202
+ if self.subject_token_indices is not None:
203
+ return
204
+
205
+ if self.subject_sub_prompts is None:
206
+ raise ValueError('Missing subject sub-prompts.')
207
+
208
+ tokens = self._tokenize()
209
+
210
+ matches = []
211
+ self.subject_token_indices = []
212
+ for sub_prompt in self.subject_sub_prompts:
213
+ token_indices = self._determine_specific_subject_tokens(tokens, sub_prompt, matches)
214
+ matches.append(token_indices[0])
215
+ self.subject_token_indices.append(token_indices)
216
+
217
+ def _determine_specific_subject_tokens(self, tokens, sub_prompt, previous_matches):
218
+ sub_tokens = self._tokenize(sub_prompt)
219
+ sub_len = len(sub_tokens)
220
+
221
+ matches = []
222
+ for i in range(len(tokens)):
223
+ if tokens[i] == sub_tokens[0] and tokens[i:i + sub_len] == sub_tokens:
224
+ matches.append(i + 1)
225
+
226
+ if len(matches) == 0:
227
+ raise ValueError(f'Couldn\'t locate sub-prompt: {sub_prompt}.')
228
+
229
+ new_matches = [i for i in matches if i not in previous_matches]
230
+ last_match = new_matches[0] if len(new_matches) > 0 else matches[-1]
231
+ return list(range(last_match, last_match + sub_len))
232
+
233
  def _determine_eos_token(self):
234
  tokens = self._tokenize()
235
  eos_token_index = len(tokens) + 1
 
259
  self.leading_token_indices = leading_token_indices
260
 
261
  def _determine_tokens(self):
262
+ self._determine_subject_tokens()
263
  self._determine_eos_token()
264
  self._determine_filter_tokens()
265
  self._determine_leading_tokens()