jeasinema commited on
Commit
1bae6ea
1 Parent(s): f40b820

update gradio examples

Browse files
app.py CHANGED
@@ -12,29 +12,29 @@ pipe = StableDiffusion3InstructPix2PixPipeline.from_pretrained("BleachNick/SD3_U
12
  pipe = pipe.to("cuda")
13
 
14
 
15
-
16
  @spaces.GPU(duration=20)
17
  def generate(image_mask, prompt, num_inference_steps=50, image_guidance_scale=1.6, guidance_scale=7.5, seed=255):
18
  def is_blank_mask(mask_img):
19
- # Convert the mask to a numpy array and check if all values are 0 (black/transparent)
20
  mask_array = np.array(mask_img.convert('L')) # Convert to luminance to simplify the check
21
  return np.all(mask_array == 0)
22
- # Set the seed for reproducibility
23
  seed = int(seed)
24
  generator = torch.manual_seed(seed)
25
 
26
  img = image_mask["background"].convert("RGB")
27
  mask_img = image_mask["layers"][0].getchannel('A').convert("RGB")
28
 
29
- # Central crop to desired size
30
  desired_size = (512, 512)
31
 
32
  img = ImageOps.fit(img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5))
33
  mask_img = ImageOps.fit(mask_img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5))
34
 
35
  if is_blank_mask(mask_img):
36
- # Create a mask of the same size with all values set to 255 (white)
37
  mask_img = PIL.Image.new('RGB', img.size, color=(255, 255, 255))
 
 
 
 
38
  mask_img = mask_img.convert('RGB')
39
 
40
  image = pipe(
@@ -47,7 +47,8 @@ def generate(image_mask, prompt, num_inference_steps=50, image_guidance_scale=1.
47
  generator=generator
48
  ).images[0]
49
 
50
- return image
 
51
 
52
  example_lists=[
53
 
@@ -64,36 +65,30 @@ example_lists=[
64
  ]
65
  mask_ex_list = []
66
  for exp in example_lists:
67
- ex_dict= {}
68
  ex_dict['background'] = exp[0][0]
69
- ex_dict['layers'] = [exp[0][1],exp[0][2]]
70
- ex_dict['composite'] = exp[0][2]
71
- re_list = [ex_dict, exp[1],exp[2],exp[3],exp[4],exp[5]]
72
  mask_ex_list.append(re_list)
73
 
74
- # image_mask_input = gr.ImageMask(label="Input Image", type="pil", brush_color="#000000", elem_id="inputmask",
75
- # shape=(512, 512))
76
- image_mask_input = gr.ImageMask(sources='upload',type="pil",label="Input Image: Mask with pen or leave unmasked",transforms=(),layers=False)
77
  prompt_input = gr.Textbox(label="Prompt")
78
  num_inference_steps_input = gr.Slider(minimum=0, maximum=100, value=50, label="Number of Inference Steps")
79
  image_guidance_scale_input = gr.Slider(minimum=0.0, maximum=2.5, value=1.5, label="Image Guidance Scale")
80
  guidance_scale_input = gr.Slider(minimum=0.0, maximum=17.5, value=12.5, label="Guidance Scale")
81
  seed_input = gr.Textbox(value="255", label="Random Seed")
82
 
83
- inputs = [image_mask_input, prompt_input, num_inference_steps_input, image_guidance_scale_input, guidance_scale_input,
84
- seed_input]
85
- outputs = gr.Image(label="Generated Image")
86
-
87
 
88
- # Custom HTML content
89
  article_html = """
90
  <div style="text-align: center; max-width: 1000px; margin: 20px auto; font-family: Arial, sans-serif;">
91
  <h2 style="font-weight: 900; font-size: 2.5rem; margin-bottom: 0.5rem;">
92
  🖼️ UltraEdit for Fine-Grained Image Editing
93
  </h2>
94
  <div style="margin-bottom: 1rem;">
95
- <h3 style="font-weight: 500; font-size: 1.25rem; margin: 0;">
96
- </h3>
97
  <p style="font-weight: 400; font-size: 1rem; margin: 0.5rem 0;">
98
  Haozhe Zhao<sup>1*</sup>, Xiaojian Ma<sup>2*</sup>, Liang Chen<sup>1</sup>, Shuzheng Si<sup>1</sup>, Rujie Wu<sup>1</sup>,
99
  Kaikai An<sup>1</sup>, Peiyu Yu<sup>3</sup>, Minjia Zhang<sup>4</sup>, Qing Li<sup>2</sup>, Baobao Chang<sup>2</sup>
@@ -109,7 +104,7 @@ article_html = """
109
  <a href="https://huggingface.co/datasets/BleachNick/UltraEdit_500k" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
110
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Dataset_500k" style="height: 20px; vertical-align: middle;"> Dataset_500k
111
  </a>
112
- <a href="https://ultra-editing.github.io/" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
113
  <span style="font-size: 20px; vertical-align: middle;">🔗</span> Page
114
  </a>
115
  <a href="https://huggingface.co/BleachNick/SD3_UltraEdit_w_mask" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
@@ -121,29 +116,36 @@ article_html = """
121
  </div>
122
  <div style="text-align: left; margin: 0 auto; font-size: 1rem; line-height: 1.5;">
123
  <p>
124
- <b>UltraEdit</b> is a dataset designed for fine-grained, instruction-based image editing. It contains over 4 million free-form image editing samples and more than 100,000 region-based image editing samples, automatically generated with real images as anchors.
125
  </p>
126
  <p>
127
- This demo allows you to perform image editing using the <a href="https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers" style="color: blue; text-decoration: none;">Stable Diffusion 3</a> model trained with this extensive dataset. It supports both free-form (without mask) and region-based (with mask) image editing. Use the sliders to adjust the inference steps and guidance scales, and provide a seed for reproducibility. The image guidance scale of 1.5 and text guidance scale of 7.5 / 12.5 is a good start for free-from/region-based image editing.
128
  </p>
 
 
 
129
  </div>
130
  </div>
131
  """
132
- html='''
133
  <div style="text-align: left; margin-top: 2rem; font-size: 0.85rem; color: gray;">
134
- <p>
135
- <b>Usage Instructions:</b> You need to upload the images and prompts for editing. Use the pen tool to mark the areas you want to edit. If no region is marked, it will resort to free-form editing.
136
- </p>
 
 
 
 
137
  </div>
138
  '''
 
139
  demo = gr.Interface(
140
  fn=generate,
141
  inputs=inputs,
142
  outputs=outputs,
143
- description=article_html, # Add article parameter
144
- article = html,
145
  examples=mask_ex_list
146
  )
147
 
148
- demo.queue().launch()
149
-
 
12
  pipe = pipe.to("cuda")
13
 
14
 
 
15
  @spaces.GPU(duration=20)
16
  def generate(image_mask, prompt, num_inference_steps=50, image_guidance_scale=1.6, guidance_scale=7.5, seed=255):
17
  def is_blank_mask(mask_img):
 
18
  mask_array = np.array(mask_img.convert('L')) # Convert to luminance to simplify the check
19
  return np.all(mask_array == 0)
20
+
21
  seed = int(seed)
22
  generator = torch.manual_seed(seed)
23
 
24
  img = image_mask["background"].convert("RGB")
25
  mask_img = image_mask["layers"][0].getchannel('A').convert("RGB")
26
 
 
27
  desired_size = (512, 512)
28
 
29
  img = ImageOps.fit(img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5))
30
  mask_img = ImageOps.fit(mask_img, desired_size, method=Image.LANCZOS, centering=(0.5, 0.5))
31
 
32
  if is_blank_mask(mask_img):
 
33
  mask_img = PIL.Image.new('RGB', img.size, color=(255, 255, 255))
34
+ editing_mode = "Free-form"
35
+ else:
36
+ editing_mode = "Region-based"
37
+
38
  mask_img = mask_img.convert('RGB')
39
 
40
  image = pipe(
 
47
  generator=generator
48
  ).images[0]
49
 
50
+ return image, f"Editing Mode: {editing_mode}"
51
+
52
 
53
  example_lists=[
54
 
 
65
  ]
66
  mask_ex_list = []
67
  for exp in example_lists:
68
+ ex_dict = {}
69
  ex_dict['background'] = exp[0][0]
70
+ ex_dict['layers'] = [exp[0][1], exp[0][2]]
71
+ ex_dict['composite'] = exp[0][2]
72
+ re_list = [ex_dict, exp[1], exp[2], exp[3], exp[4], exp[5]]
73
  mask_ex_list.append(re_list)
74
 
75
+ image_mask_input = gr.ImageMask(sources='upload', type="pil", label="Input Image: Mask with pen or leave unmasked", transforms=(), layers=False)
 
 
76
  prompt_input = gr.Textbox(label="Prompt")
77
  num_inference_steps_input = gr.Slider(minimum=0, maximum=100, value=50, label="Number of Inference Steps")
78
  image_guidance_scale_input = gr.Slider(minimum=0.0, maximum=2.5, value=1.5, label="Image Guidance Scale")
79
  guidance_scale_input = gr.Slider(minimum=0.0, maximum=17.5, value=12.5, label="Guidance Scale")
80
  seed_input = gr.Textbox(value="255", label="Random Seed")
81
 
82
+ inputs = [image_mask_input, prompt_input, num_inference_steps_input, image_guidance_scale_input, guidance_scale_input, seed_input]
83
+ outputs = [gr.Image(label="Generated Image"), gr.Text(label="Editing Mode")]
 
 
84
 
 
85
  article_html = """
86
  <div style="text-align: center; max-width: 1000px; margin: 20px auto; font-family: Arial, sans-serif;">
87
  <h2 style="font-weight: 900; font-size: 2.5rem; margin-bottom: 0.5rem;">
88
  🖼️ UltraEdit for Fine-Grained Image Editing
89
  </h2>
90
  <div style="margin-bottom: 1rem;">
91
+ <h3 style="font-weight: 500; font-size: 1.25rem; margin: 0;"></h3>
 
92
  <p style="font-weight: 400; font-size: 1rem; margin: 0.5rem 0;">
93
  Haozhe Zhao<sup>1*</sup>, Xiaojian Ma<sup>2*</sup>, Liang Chen<sup>1</sup>, Shuzheng Si<sup>1</sup>, Rujie Wu<sup>1</sup>,
94
  Kaikai An<sup>1</sup>, Peiyu Yu<sup>3</sup>, Minjia Zhang<sup>4</sup>, Qing Li<sup>2</sup>, Baobao Chang<sup>2</sup>
 
104
  <a href="https://huggingface.co/datasets/BleachNick/UltraEdit_500k" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
105
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Dataset_500k" style="height: 20px; vertical-align: middle;"> Dataset_500k
106
  </a>
107
+ <a href="https://ultra-editing.github.io/" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
108
  <span style="font-size: 20px; vertical-align: middle;">🔗</span> Page
109
  </a>
110
  <a href="https://huggingface.co/BleachNick/SD3_UltraEdit_w_mask" style="display: flex; align-items: center; text-decoration: none; color: blue; font-weight: bold; gap: 0.5rem;">
 
116
  </div>
117
  <div style="text-align: left; margin: 0 auto; font-size: 1rem; line-height: 1.5;">
118
  <p>
119
+ <b>UltraEdit</b> is a dataset designed for fine-grained, instruction-based image editing. It contains over 4 million free-form image editing samples and more than 100,000 region-based image editing samples, automatically generated with real images as anchors.
120
  </p>
121
  <p>
122
+ This demo allows you to perform image editing using the <a href="https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers" style="color: blue; text-decoration: none;">Stable Diffusion 3</a> model trained with this extensive dataset. It supports both free-form (without mask) and region-based (with mask) image editing. Use the sliders to adjust the inference steps and guidance scales, and provide a seed for reproducibility. The image guidance scale of 1.5 and text guidance scale of 7.5 / 12.5 is a good start for free-form/region-based image editing.
123
  </p>
124
+ <p>
125
+ <b>Usage Instructions:</b> You need to upload the images and prompts for editing. Use the pen tool to mark the areas you want to edit. If no region is marked, it will resort to free-form editing.
126
+ </p>
127
  </div>
128
  </div>
129
  """
130
+ html = '''
131
  <div style="text-align: left; margin-top: 2rem; font-size: 0.85rem; color: gray;">
132
+ <b>Limitations:</b>
133
+ <ul>
134
+ <li>We have not conducted any NSFW checks;</li>
135
+ <li>Due to the bias of the generated models, the model performance is still weak when dealing with high-frequency information such as <b>human facial expressions or text in the images</b>;</li>
136
+ <li>We unified the free-form and region-based image editing by adding an extra channel of the mask image to the dataset. When doing free-form image editing, the network receives a blank mask.</li>
137
+ <li>The generation result is sensitive to the guidance scale. For text guidance, based on experience, free-form image editing will perform better with a relatively low guidance score (7.5 or lower), while region-based image editing will perform better with a higher guidance score.</li>
138
+ </ul>
139
  </div>
140
  '''
141
+
142
  demo = gr.Interface(
143
  fn=generate,
144
  inputs=inputs,
145
  outputs=outputs,
146
+ description=article_html,
147
+ article=html,
148
  examples=mask_ex_list
149
  )
150
 
151
+ demo.queue().launch()
 
gradio_cached_examples/18/Generated Image/39941882bacd2ca28afc/image.webp ADDED
gradio_cached_examples/18/Generated Image/5f053b78177fa3272b0c/image.webp ADDED
gradio_cached_examples/18/indices.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 3
2
+ 4
gradio_cached_examples/18/log.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Generated Image,flag,username,timestamp
2
+ "{""path"": ""gradio_cached_examples/18/Generated Image/5f053b78177fa3272b0c/image.webp"", ""url"": ""/file=/tmp/gradio/8043730bd61f29a1a30304ac15699d0064c2531e/image.webp"", ""size"": null, ""orig_name"": ""image.webp"", ""mime_type"": null, ""is_stream"": false, ""meta"": {""_type"": ""gradio.FileData""}}",,,2024-07-03 08:58:08.623689
3
+ "{""path"": ""gradio_cached_examples/18/Generated Image/39941882bacd2ca28afc/image.webp"", ""url"": ""/file=/tmp/gradio/8270e583b13f74410600120bfecac4875318da12/image.webp"", ""size"": null, ""orig_name"": ""image.webp"", ""mime_type"": null, ""is_stream"": false, ""meta"": {""_type"": ""gradio.FileData""}}",,,2024-07-03 09:00:30.281946
gradio_cached_examples/20/Generated Image/1c15f96417dfc872dde9/image.webp ADDED
gradio_cached_examples/20/Generated Image/7003c566c19b7f282686/image.webp ADDED
gradio_cached_examples/20/Generated Image/cb25215afd568f92f343/image.webp ADDED
gradio_cached_examples/20/Generated Image/ffb6d87910f6ca2b2f72/image.webp ADDED
gradio_cached_examples/20/indices.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 4
2
+ 2
3
+ 0
4
+ 3
gradio_cached_examples/20/log.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Generated Image,Editing Mode,flag,username,timestamp
2
+ "{""path"": ""gradio_cached_examples/20/Generated Image/1c15f96417dfc872dde9/image.webp"", ""url"": ""/file=/tmp/gradio/01a2766c512a956cf5245499b443d51f042249da/image.webp"", ""size"": null, ""orig_name"": ""image.webp"", ""mime_type"": null, ""is_stream"": false, ""meta"": {""_type"": ""gradio.FileData""}}",Editing Mode: Free-form,,,2024-07-03 09:14:45.183288
3
+ "{""path"": ""gradio_cached_examples/20/Generated Image/ffb6d87910f6ca2b2f72/image.webp"", ""url"": ""/file=/tmp/gradio/ddb27dadd6deb2e3e9203dc5a4eb32070bf51da9/image.webp"", ""size"": null, ""orig_name"": ""image.webp"", ""mime_type"": null, ""is_stream"": false, ""meta"": {""_type"": ""gradio.FileData""}}",Editing Mode: Region-based,,,2024-07-03 09:15:49.425530
4
+ "{""path"": ""gradio_cached_examples/20/Generated Image/cb25215afd568f92f343/image.webp"", ""url"": ""/file=/tmp/gradio/17eb2aa4890218abb35c8edb6529dbd890474973/image.webp"", ""size"": null, ""orig_name"": ""image.webp"", ""mime_type"": null, ""is_stream"": false, ""meta"": {""_type"": ""gradio.FileData""}}",Editing Mode: Region-based,,,2024-07-03 09:16:23.823989
5
+ "{""path"": ""gradio_cached_examples/20/Generated Image/7003c566c19b7f282686/image.webp"", ""url"": ""/file=/tmp/gradio/f914d44ebf581bc784c804cafe98c6eaa6bd1139/image.webp"", ""size"": null, ""orig_name"": ""image.webp"", ""mime_type"": null, ""is_stream"": false, ""meta"": {""_type"": ""gradio.FileData""}}",Editing Mode: Region-based,,,2024-07-03 09:21:13.664147