JingyeChen commited on
Commit
78551e3
1 Parent(s): a6470eb
Files changed (1) hide show
  1. app.py +38 -12
app.py CHANGED
@@ -23,7 +23,7 @@ if not os.path.exists('images2'):
23
  with zipfile.ZipFile('images2.zip', 'r') as zip_ref:
24
  zip_ref.extractall('.')
25
 
26
- os.system('nvidia-smi')
27
  os.system('ls')
28
 
29
  #### import m1
@@ -186,6 +186,27 @@ def get_pixels(i, t, evt: gr.SelectData):
186
  return image
187
 
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
 
191
  def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural):
@@ -204,6 +225,7 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
204
  user_prompt = f'{user_prompt}'
205
  composed_prompt = user_prompt
206
  prompt = tokenizer.encode(user_prompt)
 
207
  else:
208
  if len(stack) == 0:
209
 
@@ -245,6 +267,8 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
245
  # user_prompt = prompt
246
  current_ocr = ocrs
247
 
 
 
248
  ocr_ids = []
249
  print('user_prompt', user_prompt)
250
  print('current_ocr', current_ocr)
@@ -284,7 +308,7 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
284
 
285
  else:
286
  user_prompt += ' <|endoftext|>'
287
-
288
 
289
  for items in stack:
290
  position, text = items
@@ -358,10 +382,10 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
358
  row = index // 2
359
  col = index % 2
360
  new_image.paste(image, (col*width, row*height))
361
- os.system('nvidia-smi')
362
  torch.cuda.empty_cache()
363
- os.system('nvidia-smi')
364
- return tuple(results), composed_prompt
365
 
366
  elif radio == 'TextDiffuser-2-LCM':
367
  generator = torch.Generator(device=pipe.device).manual_seed(random.randint(0,1000))
@@ -373,10 +397,10 @@ def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guida
373
  guidance_scale=1,
374
  # num_images_per_prompt=slider_batch,
375
  ).images
376
- os.system('nvidia-smi')
377
  torch.cuda.empty_cache()
378
- os.system('nvidia-smi')
379
- return tuple(image), composed_prompt
380
 
381
  with gr.Blocks() as demo:
382
 
@@ -428,7 +452,7 @@ with gr.Blocks() as demo:
428
  t = gr.Textbox(label="Keyword", value='input_keyword')
429
  redo = gr.Button(value='Redo - Cancel the last keyword')
430
  undo = gr.Button(value='Undo - Clear the canvas')
431
- skip_button = gr.Button(value='Skip - Operate next keyword')
432
 
433
  i.select(get_pixels,[i,t],[i])
434
  redo.click(exe_redo, [i,t],[i])
@@ -439,8 +463,8 @@ with gr.Blocks() as demo:
439
  slider_natural = gr.Checkbox(label="Natural image generation", value=False, info="The text position and content info will not be incorporated.")
440
  slider_step = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Sampling step", info="The sampling step for TextDiffuser-2. You may decease the step to 4 when using LCM.")
441
  slider_guidance = gr.Slider(minimum=1, maximum=13, value=7.5, step=0.5, label="Scale of classifier-free guidance", info="The scale of cfg and is set to 7.5 in default. When using LCM, cfg is set to 1.")
442
- slider_batch = gr.Slider(minimum=1, maximum=6, value=4, step=1, label="Batch size", info="The number of images to be sampled.")
443
- slider_temperature = gr.Slider(minimum=0.1, maximum=2, value=0.7, step=0.1, label="Temperature", info="Control the diversity of layout planner. Higher value indicates more diversity.")
444
  # slider_seed = gr.Slider(minimum=1, maximum=10000, label="Seed", randomize=True)
445
  button = gr.Button("Generate")
446
 
@@ -450,8 +474,10 @@ with gr.Blocks() as demo:
450
  with gr.Accordion("Intermediate results", open=False):
451
  gr.Markdown("Composed prompt")
452
  composed_prompt = gr.Textbox(label='')
 
 
453
 
454
- button.click(text_to_image, inputs=[prompt,keywords,positive_prompt, radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural], outputs=[output, composed_prompt])
455
 
456
  gr.Markdown("## Prompt Examples")
457
  gr.Examples(
 
23
  with zipfile.ZipFile('images2.zip', 'r') as zip_ref:
24
  zip_ref.extractall('.')
25
 
26
+ # os.system('nvidia-smi')
27
  os.system('ls')
28
 
29
  #### import m1
 
186
  return image
187
 
188
 
189
+ font_layout = ImageFont.truetype('./Arial.ttf', 16)
190
+
191
+ def get_layout_image(ocrs):
192
+
193
+ blank = Image.new('RGB', (256,256), (0,0,0))
194
+ draw = ImageDraw.ImageDraw(blank)
195
+
196
+ for line in ocrs.split('\n'):
197
+ line = line.strip()
198
+
199
+ if len(line) == 0:
200
+ break
201
+
202
+ pred = ' '.join(line.split()[:-1])
203
+ box = line.split()[-1]
204
+ l, t, r, b = [int(i)*2 for i in box.split(',')] # the size of canvas is 256x256
205
+ draw.rectangle([(l, t), (r, b)], outline ="red")
206
+ draw.text((l, t), pred, font=font_layout)
207
+
208
+ return blank
209
+
210
 
211
 
212
  def text_to_image(prompt,keywords,positive_prompt,radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural):
 
225
  user_prompt = f'{user_prompt}'
226
  composed_prompt = user_prompt
227
  prompt = tokenizer.encode(user_prompt)
228
+ layout_image = None
229
  else:
230
  if len(stack) == 0:
231
 
 
267
  # user_prompt = prompt
268
  current_ocr = ocrs
269
 
270
+ layout_image = get_layout_image(ocrs)
271
+
272
  ocr_ids = []
273
  print('user_prompt', user_prompt)
274
  print('current_ocr', current_ocr)
 
308
 
309
  else:
310
  user_prompt += ' <|endoftext|>'
311
+ layout_image = None
312
 
313
  for items in stack:
314
  position, text = items
 
382
  row = index // 2
383
  col = index % 2
384
  new_image.paste(image, (col*width, row*height))
385
+ # os.system('nvidia-smi')
386
  torch.cuda.empty_cache()
387
+ # os.system('nvidia-smi')
388
+ return tuple(results), composed_prompt, layout_image
389
 
390
  elif radio == 'TextDiffuser-2-LCM':
391
  generator = torch.Generator(device=pipe.device).manual_seed(random.randint(0,1000))
 
397
  guidance_scale=1,
398
  # num_images_per_prompt=slider_batch,
399
  ).images
400
+ # os.system('nvidia-smi')
401
  torch.cuda.empty_cache()
402
+ # os.system('nvidia-smi')
403
+ return tuple(image), composed_prompt, layout_image
404
 
405
  with gr.Blocks() as demo:
406
 
 
452
  t = gr.Textbox(label="Keyword", value='input_keyword')
453
  redo = gr.Button(value='Redo - Cancel the last keyword')
454
  undo = gr.Button(value='Undo - Clear the canvas')
455
+ skip_button = gr.Button(value='Skip - Operate the next keyword')
456
 
457
  i.select(get_pixels,[i,t],[i])
458
  redo.click(exe_redo, [i,t],[i])
 
463
  slider_natural = gr.Checkbox(label="Natural image generation", value=False, info="The text position and content info will not be incorporated.")
464
  slider_step = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Sampling step", info="The sampling step for TextDiffuser-2. You may decease the step to 4 when using LCM.")
465
  slider_guidance = gr.Slider(minimum=1, maximum=13, value=7.5, step=0.5, label="Scale of classifier-free guidance", info="The scale of cfg and is set to 7.5 in default. When using LCM, cfg is set to 1.")
466
+ slider_batch = gr.Slider(minimum=1, maximum=4, value=4, step=1, label="Batch size", info="The number of images to be sampled.")
467
+ slider_temperature = gr.Slider(minimum=0.1, maximum=2, value=1.4, step=0.1, label="Temperature", info="Control the diversity of layout planner. Higher value indicates more diversity.")
468
  # slider_seed = gr.Slider(minimum=1, maximum=10000, label="Seed", randomize=True)
469
  button = gr.Button("Generate")
470
 
 
474
  with gr.Accordion("Intermediate results", open=False):
475
  gr.Markdown("Composed prompt")
476
  composed_prompt = gr.Textbox(label='')
477
+ layout = gr.Image()
478
+
479
 
480
+ button.click(text_to_image, inputs=[prompt,keywords,positive_prompt, radio,slider_step,slider_guidance,slider_batch,slider_temperature,slider_natural], outputs=[output, composed_prompt, layout])
481
 
482
  gr.Markdown("## Prompt Examples")
483
  gr.Examples(