rodrigomasini commited on
Commit
76a8aa4
1 Parent(s): 3488ac9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -80
app.py CHANGED
@@ -148,15 +148,8 @@ def get_random_bool():
148
  return random.choice([True, False])
149
 
150
  def add_white_border(input_image, border_width=10):
151
- """
152
- 为PIL图像添加指定宽度的白色边框。
153
-
154
- :param input_image: PIL图像对象
155
- :param border_width: 边框宽度(单位:像素)
156
- :return: 带有白色边框的PIL图像对象
157
- """
158
- border_color = 'white' # 白色边框
159
- # 添加边框
160
  img_with_border = ImageOps.expand(input_image, border=border_width, fill=border_color)
161
  return img_with_border
162
 
@@ -406,26 +399,20 @@ VISION_CONFIG_DICT = {
406
  }
407
 
408
  def concat_images_vertically_and_scale(images,scale_factor=2):
409
- # 加载所有图像
410
- # 确保所有图像的宽度一致
411
  widths = [img.width for img in images]
412
  if not all(width == widths[0] for width in widths):
413
  raise ValueError('All images must have the same width.')
414
-
415
- # 计算总高度
416
  total_height = sum(img.height for img in images)
417
 
418
- # 创建新的图像,宽度与原图相同,高度为所有图像高度之和
419
  max_width = max(widths)
420
  concatenated_image = Image.new('RGB', (max_width, total_height))
421
 
422
- # 竖直拼接图像
423
  current_height = 0
424
  for img in images:
425
  concatenated_image.paste(img, (0, current_height))
426
  current_height += img.height
427
 
428
- # 缩放图像为1/n高度
429
  new_height = concatenated_image.height // scale_factor
430
  new_width = concatenated_image.width // scale_factor
431
  resized_image = concatenated_image.resize((new_width, new_height), Image.ANTIALIAS)
@@ -434,19 +421,13 @@ def concat_images_vertically_and_scale(images,scale_factor=2):
434
 
435
 
436
  def combine_images_horizontally(images):
437
- # 读取所有图片并存入列表
438
-
439
- # 获取每幅图像的宽度和高度
440
  widths, heights = zip(*(i.size for i in images))
441
 
442
- # 计算总宽度和最大高度
443
  total_width = sum(widths)
444
  max_height = max(heights)
445
 
446
- # 创建新的空白图片,用于拼接
447
  new_im = Image.new('RGB', (total_width, max_height))
448
 
449
- # 将图片横向拼接
450
  x_offset = 0
451
  for im in images:
452
  new_im.paste(im, (x_offset, 0))
@@ -456,28 +437,20 @@ def combine_images_horizontally(images):
456
 
457
  def combine_images_vertically_with_resize(images):
458
 
459
- # 获取所有图片的宽度和高度
460
  widths, heights = zip(*(i.size for i in images))
461
 
462
- # 确定新图片的宽度,即所有图片中最小的宽度
463
  min_width = min(widths)
464
 
465
- # 调整图片尺寸以保持宽度一致,长宽比不变
466
  resized_images = []
467
  for img in images:
468
- # 计算新高度保持图片长宽比
469
  new_height = int(min_width * img.height / img.width)
470
- # 调整图片大小
471
  resized_img = img.resize((min_width, new_height), Image.ANTIALIAS)
472
  resized_images.append(resized_img)
473
 
474
- # 计算所有调整尺寸后图片的总高度
475
  total_height = sum(img.height for img in resized_images)
476
 
477
- # 创建一个足够宽和高的新图片对象
478
  new_im = Image.new('RGB', (min_width, total_height))
479
 
480
- # 竖直拼接图片
481
  y_offset = 0
482
  for im in resized_images:
483
  new_im.paste(im, (0, y_offset))
@@ -516,10 +489,7 @@ def distribute_images(images, group_sizes=(4, 3, 2)):
516
  remaining = len(images)
517
 
518
  while remaining > 0:
519
- # 优先分配最大组(4张图片),再考虑3张,最后处理2张
520
  for size in sorted(group_sizes, reverse=True):
521
- # 如果剩下的图片数量大于等于当前组大小,或者为图片总数时(也就是第一次迭代)
522
- # 开始创建新组
523
  if remaining >= size or remaining == len(images):
524
  if remaining > size:
525
  new_group = images[-remaining: -remaining + size]
@@ -528,7 +498,6 @@ def distribute_images(images, group_sizes=(4, 3, 2)):
528
  groups.append(new_group)
529
  remaining -= size
530
  break
531
- # 如果剩下的图片少于最小的组大小(2张)并且已经有组了,就把剩下的图片加到最后一个组
532
  elif remaining < min(group_sizes) and groups:
533
  groups[-1].extend(images[-remaining:])
534
  remaining = 0
@@ -637,35 +606,15 @@ style_list = [
637
  "prompt": "{prompt}",
638
  "negative_prompt": "",
639
  },
640
- {
641
- "name": "Japanese Anime",
642
- "prompt": "anime artwork illustrating {prompt}. created by japanese anime studio. highly emotional. best quality, high resolution",
643
- "negative_prompt": "low quality, low resolution"
644
- },
645
  {
646
  "name": "Cinematic",
647
  "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
648
  "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
649
  },
650
- {
651
- "name": "Disney Charactor",
652
- "prompt": "A Pixar animation character of {prompt} . pixar-style, studio anime, Disney, high-quality",
653
- "negative_prompt": "lowres, bad anatomy, bad hands, text, bad eyes, bad arms, bad legs, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, blurry, grayscale, noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo",
654
- },
655
  {
656
  "name": "Photographic",
657
  "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
658
  "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
659
- },
660
- {
661
- "name": "Comic book",
662
- "prompt": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed",
663
- "negative_prompt": "photograph, deformed, glitch, noisy, realistic, stock photo",
664
- },
665
- {
666
- "name": "Line art",
667
- "prompt": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics",
668
- "negative_prompt": "anime, photorealistic, 35mm film, deformed, glitch, blurry, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, mutated, realism, realistic, impressionism, expressionism, oil, acrylic",
669
  }
670
  ]
671
 
@@ -675,7 +624,7 @@ image_encoder_path = "./data/models/ip_adapter/sdxl_models/image_encoder"
675
  ip_ckpt = "./data/models/ip_adapter/sdxl_models/ip-adapter_sdxl_vit-h.bin"
676
  os.environ["no_proxy"] = "localhost,127.0.0.1,::1"
677
  STYLE_NAMES = list(styles.keys())
678
- DEFAULT_STYLE_NAME = "Japanese Anime"
679
  global models_dict
680
  use_va = True
681
  models_dict = {
@@ -748,7 +697,6 @@ class SpatialAttnProcessor2_0(torch.nn.Module):
748
  self.id_bank[cur_step] = [hidden_states[:self.id_length], hidden_states[self.id_length:]]
749
  else:
750
  encoder_hidden_states = torch.cat((self.id_bank[cur_step][0].to(self.device),hidden_states[:1],self.id_bank[cur_step][1].to(self.device),hidden_states[1:]))
751
- # 判断随机数是否大于0.5
752
  if cur_step <=1:
753
  hidden_states = self.__call2__(attn, hidden_states,None,attention_mask,temb)
754
  else: # 256 1024 4096
@@ -1001,39 +949,16 @@ css = '''
1001
 
1002
  #################################################
1003
  title = r"""
1004
- <h1 align="center">StoryDiffusion: Consistent Self-Attention for Long-Range Image and Video Generation</h1>
1005
  """
1006
 
1007
  description = r"""
1008
- <b>Official 🤗 Gradio demo</b> for <a href='https://github.com/HVision-NKU/StoryDiffusion' target='_blank'><b>StoryDiffusion: Consistent Self-Attention for Long-Range Image and Video Generation</b></a>.<br>
1009
- ❗️❗️❗️[<b>Important</b>] Personalization steps:<br>
1010
  1️⃣ Enter a Textual Description for Character, if you add the Ref-Image, making sure to <b>follow the class word</b> you want to customize with the <b>trigger word</b>: `img`, such as: `man img` or `woman img` or `girl img`.<br>
1011
  2️⃣ Enter the prompt array, each line corrsponds to one generated image.<br>
1012
  3️⃣ Choose your preferred style template.<br>
1013
  4️⃣ Click the <b>Submit</b> button to start customizing.
1014
  """
1015
 
1016
- article = r"""
1017
- If StoryDiffusion is helpful, please help to ⭐ the <a href='https://github.com/HVision-NKU/StoryDiffusion' target='_blank'>Github Repo</a>. Thanks!
1018
- [![GitHub Stars](https://img.shields.io/github/stars/HVision-NKU/StoryDiffusion?style=social)](https://github.com/HVision-NKU/StoryDiffusion)
1019
- ---
1020
- 📝 **Citation**
1021
- <br>
1022
- If our work is useful for your research, please consider citing:
1023
- ```bibtex
1024
- @article{Zhou2024storydiffusion,
1025
- title={StoryDiffusion: Consistent Self-Attention for Long-Range Image and Video Generation},
1026
- author={Zhou, Yupeng and Zhou, Daquan and Cheng, Ming-Ming and Feng, Jiashi and Hou, Qibin},
1027
- year={2024}
1028
- }
1029
- ```
1030
- 📋 **License**
1031
- <br>
1032
- The Contents you create are under Apache-2.0 LICENSE. The Code are under Attribution-NonCommercial 4.0 International.
1033
- 📧 **Contact**
1034
- <br>
1035
- If you have any questions, please feel free to reach me out at <b>ypzhousdu@gmail.com</b>.
1036
- """
1037
  version = r"""
1038
  <h3 align="center">StoryDiffusion Version 0.01 (test version)</h3>
1039
  <h5 >1. Support image ref image. (Cartoon Ref image is not support now)</h5>
 
148
  return random.choice([True, False])
149
 
150
  def add_white_border(input_image, border_width=10):
151
+
152
+ border_color = 'white'
 
 
 
 
 
 
 
153
  img_with_border = ImageOps.expand(input_image, border=border_width, fill=border_color)
154
  return img_with_border
155
 
 
399
  }
400
 
401
  def concat_images_vertically_and_scale(images,scale_factor=2):
 
 
402
  widths = [img.width for img in images]
403
  if not all(width == widths[0] for width in widths):
404
  raise ValueError('All images must have the same width.')
405
+
 
406
  total_height = sum(img.height for img in images)
407
 
 
408
  max_width = max(widths)
409
  concatenated_image = Image.new('RGB', (max_width, total_height))
410
 
 
411
  current_height = 0
412
  for img in images:
413
  concatenated_image.paste(img, (0, current_height))
414
  current_height += img.height
415
 
 
416
  new_height = concatenated_image.height // scale_factor
417
  new_width = concatenated_image.width // scale_factor
418
  resized_image = concatenated_image.resize((new_width, new_height), Image.ANTIALIAS)
 
421
 
422
 
423
  def combine_images_horizontally(images):
 
 
 
424
  widths, heights = zip(*(i.size for i in images))
425
 
 
426
  total_width = sum(widths)
427
  max_height = max(heights)
428
 
 
429
  new_im = Image.new('RGB', (total_width, max_height))
430
 
 
431
  x_offset = 0
432
  for im in images:
433
  new_im.paste(im, (x_offset, 0))
 
437
 
438
  def combine_images_vertically_with_resize(images):
439
 
 
440
  widths, heights = zip(*(i.size for i in images))
441
 
 
442
  min_width = min(widths)
443
 
 
444
  resized_images = []
445
  for img in images:
 
446
  new_height = int(min_width * img.height / img.width)
 
447
  resized_img = img.resize((min_width, new_height), Image.ANTIALIAS)
448
  resized_images.append(resized_img)
449
 
 
450
  total_height = sum(img.height for img in resized_images)
451
 
 
452
  new_im = Image.new('RGB', (min_width, total_height))
453
 
 
454
  y_offset = 0
455
  for im in resized_images:
456
  new_im.paste(im, (0, y_offset))
 
489
  remaining = len(images)
490
 
491
  while remaining > 0:
 
492
  for size in sorted(group_sizes, reverse=True):
 
 
493
  if remaining >= size or remaining == len(images):
494
  if remaining > size:
495
  new_group = images[-remaining: -remaining + size]
 
498
  groups.append(new_group)
499
  remaining -= size
500
  break
 
501
  elif remaining < min(group_sizes) and groups:
502
  groups[-1].extend(images[-remaining:])
503
  remaining = 0
 
606
  "prompt": "{prompt}",
607
  "negative_prompt": "",
608
  },
 
 
 
 
 
609
  {
610
  "name": "Cinematic",
611
  "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
612
  "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
613
  },
 
 
 
 
 
614
  {
615
  "name": "Photographic",
616
  "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
617
  "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
 
 
 
 
 
 
 
 
 
 
618
  }
619
  ]
620
 
 
624
  ip_ckpt = "./data/models/ip_adapter/sdxl_models/ip-adapter_sdxl_vit-h.bin"
625
  os.environ["no_proxy"] = "localhost,127.0.0.1,::1"
626
  STYLE_NAMES = list(styles.keys())
627
+ DEFAULT_STYLE_NAME = "Cinematic"
628
  global models_dict
629
  use_va = True
630
  models_dict = {
 
697
  self.id_bank[cur_step] = [hidden_states[:self.id_length], hidden_states[self.id_length:]]
698
  else:
699
  encoder_hidden_states = torch.cat((self.id_bank[cur_step][0].to(self.device),hidden_states[:1],self.id_bank[cur_step][1].to(self.device),hidden_states[1:]))
 
700
  if cur_step <=1:
701
  hidden_states = self.__call2__(attn, hidden_states,None,attention_mask,temb)
702
  else: # 256 1024 4096
 
949
 
950
  #################################################
951
  title = r"""
952
+ <h1 align="center">Demo for Consistent Self-Attention for Long-Range Image and Video Generation</h1>
953
  """
954
 
955
  description = r"""
 
 
956
  1️⃣ Enter a Textual Description for Character, if you add the Ref-Image, making sure to <b>follow the class word</b> you want to customize with the <b>trigger word</b>: `img`, such as: `man img` or `woman img` or `girl img`.<br>
957
  2️⃣ Enter the prompt array, each line corrsponds to one generated image.<br>
958
  3️⃣ Choose your preferred style template.<br>
959
  4️⃣ Click the <b>Submit</b> button to start customizing.
960
  """
961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
962
  version = r"""
963
  <h3 align="center">StoryDiffusion Version 0.01 (test version)</h3>
964
  <h5 >1. Support image ref image. (Cartoon Ref image is not support now)</h5>