Adapter commited on
Commit
6832f38
1 Parent(s): 196678d

add free-hand and anything

Browse files
Files changed (1) hide show
  1. app.py +171 -94
app.py CHANGED
@@ -18,10 +18,14 @@ from torch import autocast
18
  from ldm.inference_base import (DEFAULT_NEGATIVE_PROMPT, diffusion_inference, get_adapters, get_sd_models)
19
  from ldm.modules.extra_condition import api
20
  from ldm.modules.extra_condition.api import (ExtraCondition, get_adapter_feature, get_cond_model)
 
 
21
 
22
  torch.set_grad_enabled(False)
23
 
24
- supported_cond = ['style', 'color', 'sketch', 'openpose', 'depth', 'canny']
 
 
25
 
26
  # download the checkpoints
27
  urls = {
@@ -35,6 +39,7 @@ urls = {
35
  "models/t2iadapter_sketch_sd15v2.pth"
36
  ],
37
  'runwayml/stable-diffusion-v1-5': ['v1-5-pruned-emaonly.ckpt'],
 
38
  'andite/anything-v4.0': ['anything-v4.0-pruned.ckpt', 'anything-v4.0.vae.pt'],
39
  }
40
 
@@ -93,93 +98,130 @@ global_opt.sampler = 'ddim'
93
  global_opt.cond_weight = 1.0
94
  global_opt.C = 4
95
  global_opt.f = 8
96
-
97
- # stable-diffusion model
98
- sd_model, sampler = get_sd_models(global_opt)
99
  # adapters and models to processing condition inputs
100
  adapters = {}
101
  cond_models = {}
102
  torch.cuda.empty_cache()
103
 
104
 
105
- def run(*args):
106
- with torch.inference_mode(), \
107
- sd_model.ema_scope(), \
108
- autocast('cuda'):
109
-
110
- inps = []
111
- for i in range(0, len(args) - 8, len(supported_cond)):
112
- inps.append(args[i:i + len(supported_cond)])
113
 
114
- opt = copy.deepcopy(global_opt)
115
- opt.prompt, opt.neg_prompt, opt.scale, opt.n_samples, opt.seed, opt.steps, opt.resize_short_edge, opt.cond_tau \
116
- = args[-8:]
117
 
118
- conds = []
119
- activated_conds = []
 
 
 
120
 
121
- ims1 = []
122
- ims2 = []
123
- for idx, (b, im1, im2, cond_weight) in enumerate(zip(*inps)):
124
- if idx > 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  if b != 'Nothing' and (im1 is not None or im2 is not None):
 
 
 
 
126
  if im1 is not None:
127
  h, w, _ = im1.shape
128
  else:
129
  h, w, _ = im2.shape
130
- # break
131
- # resize all the images to the same size
132
- for idx, (b, im1, im2, cond_weight) in enumerate(zip(*inps)):
133
- if idx == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  ims1.append(im1)
135
  ims2.append(im2)
136
- continue
137
- if b != 'Nothing':
138
- if im1 is not None:
139
- im1 = cv2.resize(im1, (w, h), interpolation=cv2.INTER_CUBIC)
140
- if im2 is not None:
141
- im2 = cv2.resize(im2, (w, h), interpolation=cv2.INTER_CUBIC)
142
- ims1.append(im1)
143
- ims2.append(im2)
144
-
145
- for idx, (b, _, _, cond_weight) in enumerate(zip(*inps)):
146
- cond_name = supported_cond[idx]
147
- if b == 'Nothing':
148
- if cond_name in adapters:
149
- adapters[cond_name]['model'] = adapters[cond_name]['model'].cpu()
150
- else:
151
- activated_conds.append(cond_name)
152
- if cond_name in adapters:
153
- adapters[cond_name]['model'] = adapters[cond_name]['model'].to(opt.device)
154
  else:
155
- adapters[cond_name] = get_adapters(opt, getattr(ExtraCondition, cond_name))
156
- adapters[cond_name]['cond_weight'] = cond_weight
 
 
 
 
 
157
 
158
- process_cond_module = getattr(api, f'get_cond_{cond_name}')
159
 
160
- if b == 'Image':
161
- if cond_name not in cond_models:
162
- cond_models[cond_name] = get_cond_model(opt, getattr(ExtraCondition, cond_name))
163
- conds.append(process_cond_module(opt, ims1[idx], 'image', cond_models[cond_name]))
164
- else:
165
- conds.append(process_cond_module(opt, ims2[idx], cond_name, None))
 
 
 
166
 
167
- adapter_features, append_to_context = get_adapter_feature(
168
- conds, [adapters[cond_name] for cond_name in activated_conds])
169
 
170
- output_conds = []
171
- for cond in conds:
172
- output_conds.append(tensor2img(cond, rgb2bgr=False))
173
 
174
- ims = []
175
- seed_everything(opt.seed)
176
- for _ in range(opt.n_samples):
177
- result = diffusion_inference(opt, sd_model, sampler, adapter_features, append_to_context)
178
- ims.append(tensor2img(result, rgb2bgr=False))
179
 
180
- # Clear GPU memory cache so less likely to OOM
181
- torch.cuda.empty_cache()
182
- return ims, output_conds
183
 
184
 
185
  def change_visible(im1, im2, val):
@@ -195,13 +237,14 @@ def change_visible(im1, im2, val):
195
  outputs[im2] = gr.update(visible=True)
196
  return outputs
197
 
198
-
199
- DESCRIPTION = '# [Composable T2I-Adapter](https://github.com/TencentARC/T2I-Adapter)'
200
 
201
  DESCRIPTION += f'<p>Gradio demo for **T2I-Adapter**: [[GitHub]](https://github.com/TencentARC/T2I-Adapter), [[Paper]](https://arxiv.org/abs/2302.08453). If T2I-Adapter is helpful, please help to ⭐ the [Github Repo](https://github.com/TencentARC/T2I-Adapter) and recommend it to your friends 😊 </p>'
202
 
203
  DESCRIPTION += f'<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/Adapter/T2I-Adapter?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
204
 
 
 
205
  with gr.Blocks(css='style.css') as demo:
206
  gr.Markdown(DESCRIPTION)
207
 
@@ -215,7 +258,7 @@ with gr.Blocks(css='style.css') as demo:
215
  with gr.Box():
216
  gr.Markdown("<h5><center>Style & Color</center></h5>")
217
  with gr.Row():
218
- for cond_name in supported_cond[:2]:
219
  with gr.Box():
220
  with gr.Column():
221
  if cond_name == 'style':
@@ -232,6 +275,7 @@ with gr.Blocks(css='style.css') as demo:
232
  interactive=True,
233
  value="Nothing",
234
  )
 
235
  im1 = gr.Image(
236
  source='upload', label="Image", interactive=True, visible=False, type="numpy")
237
  im2 = gr.Image(
@@ -251,11 +295,38 @@ with gr.Blocks(css='style.css') as demo:
251
  ims1.append(im1)
252
  ims2.append(im2)
253
  cond_weights.append(cond_weight)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  with gr.Column(scale=4):
255
  with gr.Box():
256
  gr.Markdown("<h5><center>Structure</center></h5>")
257
  with gr.Row():
258
- for cond_name in supported_cond[2:6]:
259
  with gr.Box():
260
  with gr.Column():
261
  if cond_name == 'openpose':
@@ -272,6 +343,7 @@ with gr.Blocks(css='style.css') as demo:
272
  interactive=True,
273
  value="Nothing",
274
  )
 
275
  im1 = gr.Image(
276
  source='upload', label="Image", interactive=True, visible=False, type="numpy")
277
  im2 = gr.Image(
@@ -286,44 +358,46 @@ with gr.Blocks(css='style.css') as demo:
286
 
287
  fn = partial(change_visible, im1, im2)
288
  btn1.change(fn=fn, inputs=[btn1], outputs=[im1, im2], queue=False)
289
-
290
  btns.append(btn1)
291
  ims1.append(im1)
292
  ims2.append(im2)
293
  cond_weights.append(cond_weight)
294
 
295
- with gr.Column():
296
- prompt = gr.Textbox(label="Prompt")
297
-
298
- with gr.Accordion('Advanced options', open=False):
299
- neg_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT)
300
- scale = gr.Slider(
301
- label="Guidance Scale (Classifier free guidance)", value=7.5, minimum=1, maximum=20, step=0.1)
302
- n_samples = gr.Slider(label="Num samples", value=1, minimum=1, maximum=1, step=1)
303
- seed = gr.Slider(label="Seed", value=42, minimum=0, maximum=10000, step=1, randomize=True)
304
- steps = gr.Slider(label="Steps", value=50, minimum=10, maximum=100, step=1)
305
- resize_short_edge = gr.Slider(label="Image resolution", value=512, minimum=320, maximum=1024, step=1)
306
- cond_tau = gr.Slider(
307
- label="timestamp parameter that determines until which step the adapter is applied",
308
- value=1.0,
309
- minimum=0.1,
310
- maximum=1.0,
311
- step=0.05)
312
-
313
- with gr.Row():
314
- submit = gr.Button("Generate")
315
- output = gr.Gallery().style(grid=2, height='auto')
316
- cond = gr.Gallery().style(grid=2, height='auto')
 
 
317
 
318
  inps = list(chain(btns, ims1, ims2, cond_weights))
319
 
320
- inps.extend([prompt, neg_prompt, scale, n_samples, seed, steps, resize_short_edge, cond_tau])
321
- submit.click(fn=run, inputs=inps, outputs=[output, cond])
322
 
323
  ex = gr.Examples([
324
  [
325
  "Image",
326
  "Nothing",
 
327
  "Image",
328
  "Nothing",
329
  "Nothing",
@@ -346,6 +420,7 @@ with gr.Blocks(css='style.css') as demo:
346
  1,
347
  1,
348
  1,
 
349
  "master sword",
350
  "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
351
  7.5,
@@ -358,6 +433,7 @@ with gr.Blocks(css='style.css') as demo:
358
  [
359
  "Image",
360
  "Nothing",
 
361
  "Image",
362
  "Nothing",
363
  "Nothing",
@@ -380,6 +456,7 @@ with gr.Blocks(css='style.css') as demo:
380
  1,
381
  1,
382
  1,
 
383
  "motorcycle",
384
  "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
385
  7.5,
 
18
  from ldm.inference_base import (DEFAULT_NEGATIVE_PROMPT, diffusion_inference, get_adapters, get_sd_models)
19
  from ldm.modules.extra_condition import api
20
  from ldm.modules.extra_condition.api import (ExtraCondition, get_adapter_feature, get_cond_model)
21
+ import numpy as np
22
+ from ldm.util import read_state_dict
23
 
24
  torch.set_grad_enabled(False)
25
 
26
+ supported_cond_map = ['style', 'color', 'sketch', 'openpose', 'depth', 'canny']
27
+ supported_cond = ['style', 'color', 'sketch', 'sketch', 'openpose', 'depth', 'canny']
28
+ draw_map = gr.Interface(lambda x: x, gr.Image(source="canvas"), gr.Image())
29
 
30
  # download the checkpoints
31
  urls = {
 
39
  "models/t2iadapter_sketch_sd15v2.pth"
40
  ],
41
  'runwayml/stable-diffusion-v1-5': ['v1-5-pruned-emaonly.ckpt'],
42
+ 'CompVis/stable-diffusion-v-1-4-original':['sd-v1-4.ckpt'],
43
  'andite/anything-v4.0': ['anything-v4.0-pruned.ckpt', 'anything-v4.0.vae.pt'],
44
  }
45
 
 
98
  global_opt.cond_weight = 1.0
99
  global_opt.C = 4
100
  global_opt.f = 8
 
 
 
101
  # adapters and models to processing condition inputs
102
  adapters = {}
103
  cond_models = {}
104
  torch.cuda.empty_cache()
105
 
106
 
107
+ def draw_transfer(im1):
108
+ c = im1[:, :, 0:3].astype(np.float32)
109
+ a = im1[:, :, 3:4].astype(np.float32) / 255.0
110
+ im1 = c * a + 255.0 * (1.0 - a)
111
+ im1 = (im1.clip(0, 255)).astype(np.uint8)
 
 
 
112
 
113
+ return im1
 
 
114
 
115
+ class process:
116
+ def __init__(self):
117
+ self.base_model = 'v1-5-pruned-emaonly.ckpt'
118
+ # stable-diffusion model
119
+ self.sd_model, self.sampler = get_sd_models(global_opt)
120
 
121
+ def run(self, *args):
122
+ opt = copy.deepcopy(global_opt)
123
+ opt.prompt, opt.neg_prompt, opt.scale, opt.n_samples, opt.seed, opt.steps, opt.resize_short_edge, opt.cond_tau, opt.base_model \
124
+ = args[-9:]
125
+ # check base model
126
+ if opt.base_model!=self.base_model:
127
+ ckpt = os.path.join("models", opt.base_model)
128
+ pl_sd = read_state_dict(ckpt)
129
+ if "state_dict" in pl_sd:
130
+ st = pl_sd["state_dict"]
131
+ else:
132
+ st = pl_sd
133
+ self.sd_model.load_state_dict(st, strict=False)
134
+ self.base_model = opt.base_model
135
+ if self.base_model!='v1-5-pruned-emaonly.ckpt' and self.base_model!='sd-v1-4.ckpt':
136
+ vae_sd = torch.load(os.path.join('models', 'anything-v4.0.vae.pt'), map_location="cuda")
137
+ st = vae_sd["state_dict"]
138
+ self.sd_model.first_stage_model.load_state_dict(st, strict=False)
139
+
140
+ with torch.inference_mode(), \
141
+ self.sd_model.ema_scope(), \
142
+ autocast('cuda'):
143
+
144
+ inps = []
145
+ for i in range(0, len(args) - 9, len(supported_cond)):
146
+ inps.append(args[i:i + len(supported_cond)])
147
+
148
+ conds = []
149
+ activated_conds = []
150
+
151
+ ims1 = []
152
+ ims2 = []
153
+ for idx, (b, im1, im2, cond_weight) in enumerate(zip(*inps)):
154
  if b != 'Nothing' and (im1 is not None or im2 is not None):
155
+ if im1 is not None and isinstance(im1,dict):
156
+ im1 = im1['mask']
157
+ im1 = draw_transfer(im1)
158
+
159
  if im1 is not None:
160
  h, w, _ = im1.shape
161
  else:
162
  h, w, _ = im2.shape
163
+
164
+ # resize all the images to the same size
165
+ for idx, (b, im1, im2, cond_weight) in enumerate(zip(*inps)):
166
+ if idx == 0:
167
+ ims1.append(im1)
168
+ ims2.append(im2)
169
+ continue
170
+ if b != 'Nothing':
171
+ if im1 is not None and isinstance(im1,dict):
172
+ im1 = im1['mask']
173
+ im1 = draw_transfer(im1)
174
+ im2 = im1
175
+ cv2.imwrite('sketch.png', im1)
176
+ if im1 is not None:
177
+ im1 = cv2.resize(im1, (w, h), interpolation=cv2.INTER_CUBIC)
178
+ if im2 is not None:
179
+ im2 = cv2.resize(im2, (w, h), interpolation=cv2.INTER_CUBIC)
180
  ims1.append(im1)
181
  ims2.append(im2)
182
+
183
+ for idx, (b, _, _, cond_weight) in enumerate(zip(*inps)):
184
+ cond_name = supported_cond[idx]
185
+ if b == 'Nothing':
186
+ if cond_name in adapters:
187
+ adapters[cond_name]['model'] = adapters[cond_name]['model'].to(opt.device)#.cpu()
 
 
 
 
 
 
 
 
 
 
 
 
188
  else:
189
+ # print(idx,b)
190
+ activated_conds.append(cond_name)
191
+ if cond_name in adapters:
192
+ adapters[cond_name]['model'] = adapters[cond_name]['model'].to(opt.device)
193
+ else:
194
+ adapters[cond_name] = get_adapters(opt, getattr(ExtraCondition, cond_name))
195
+ adapters[cond_name]['cond_weight'] = cond_weight
196
 
197
+ process_cond_module = getattr(api, f'get_cond_{cond_name}')
198
 
199
+ if b == 'Image':
200
+ if cond_name not in cond_models:
201
+ cond_models[cond_name] = get_cond_model(opt, getattr(ExtraCondition, cond_name))
202
+ conds.append(process_cond_module(opt, ims1[idx], 'image', cond_models[cond_name]))
203
+ else:
204
+ if idx == 2: # draw
205
+ conds.append(process_cond_module(opt, (255.-ims2[idx]).astype(np.uint8), cond_name, None))
206
+ else:
207
+ conds.append(process_cond_module(opt, ims2[idx], cond_name, None))
208
 
209
+ adapter_features, append_to_context = get_adapter_feature(
210
+ conds, [adapters[cond_name] for cond_name in activated_conds])
211
 
212
+ output_conds = []
213
+ for cond in conds:
214
+ output_conds.append(tensor2img(cond, rgb2bgr=False))
215
 
216
+ ims = []
217
+ seed_everything(opt.seed)
218
+ for _ in range(opt.n_samples):
219
+ result = diffusion_inference(opt, self.sd_model, self.sampler, adapter_features, append_to_context)
220
+ ims.append(tensor2img(result, rgb2bgr=False))
221
 
222
+ # Clear GPU memory cache so less likely to OOM
223
+ torch.cuda.empty_cache()
224
+ return ims, output_conds
225
 
226
 
227
  def change_visible(im1, im2, val):
 
237
  outputs[im2] = gr.update(visible=True)
238
  return outputs
239
 
240
+ DESCRIPTION = '# [T2I-Adapter](https://github.com/TencentARC/T2I-Adapter)'
 
241
 
242
  DESCRIPTION += f'<p>Gradio demo for **T2I-Adapter**: [[GitHub]](https://github.com/TencentARC/T2I-Adapter), [[Paper]](https://arxiv.org/abs/2302.08453). If T2I-Adapter is helpful, please help to ⭐ the [Github Repo](https://github.com/TencentARC/T2I-Adapter) and recommend it to your friends 😊 </p>'
243
 
244
  DESCRIPTION += f'<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/Adapter/T2I-Adapter?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
245
 
246
+ processer = process()
247
+
248
  with gr.Blocks(css='style.css') as demo:
249
  gr.Markdown(DESCRIPTION)
250
 
 
258
  with gr.Box():
259
  gr.Markdown("<h5><center>Style & Color</center></h5>")
260
  with gr.Row():
261
+ for cond_name in supported_cond_map[:2]:
262
  with gr.Box():
263
  with gr.Column():
264
  if cond_name == 'style':
 
275
  interactive=True,
276
  value="Nothing",
277
  )
278
+
279
  im1 = gr.Image(
280
  source='upload', label="Image", interactive=True, visible=False, type="numpy")
281
  im2 = gr.Image(
 
295
  ims1.append(im1)
296
  ims2.append(im2)
297
  cond_weights.append(cond_weight)
298
+
299
+ with gr.Box():
300
+ gr.Markdown("<h5><center>Drawing</center></h5>")
301
+ with gr.Column():
302
+ btn1 = gr.Radio(
303
+ choices=["Sketch", "Nothing"],
304
+ label=f"Input type for drawing",
305
+ interactive=True,
306
+ value="Nothing")
307
+ im1 = gr.Image(source='canvas', tool='color-sketch', label='Pay attention to adjusting stylus thickness!', visible=False)
308
+ im2 = im1
309
+ cond_weight = gr.Slider(
310
+ label="Condition weight",
311
+ minimum=0,
312
+ maximum=5,
313
+ step=0.05,
314
+ value=1,
315
+ interactive=True)
316
+
317
+ fn = partial(change_visible, im1, im2)
318
+ btn1.change(fn=fn, inputs=[btn1], outputs=[im1, im2], queue=False)
319
+
320
+ btns.append(btn1)
321
+ ims1.append(im1)
322
+ ims2.append(im2)
323
+ cond_weights.append(cond_weight)
324
+
325
  with gr.Column(scale=4):
326
  with gr.Box():
327
  gr.Markdown("<h5><center>Structure</center></h5>")
328
  with gr.Row():
329
+ for cond_name in supported_cond_map[2:6]:
330
  with gr.Box():
331
  with gr.Column():
332
  if cond_name == 'openpose':
 
343
  interactive=True,
344
  value="Nothing",
345
  )
346
+
347
  im1 = gr.Image(
348
  source='upload', label="Image", interactive=True, visible=False, type="numpy")
349
  im2 = gr.Image(
 
358
 
359
  fn = partial(change_visible, im1, im2)
360
  btn1.change(fn=fn, inputs=[btn1], outputs=[im1, im2], queue=False)
 
361
  btns.append(btn1)
362
  ims1.append(im1)
363
  ims2.append(im2)
364
  cond_weights.append(cond_weight)
365
 
366
+ with gr.Column():
367
+ base_model = gr.inputs.Radio(['v1-5-pruned-emaonly.ckpt', 'sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='v1-5-pruned-emaonly.ckpt', label='The base model you want to use. You can try more base models on https://civitai.com/.')
368
+ prompt = gr.Textbox(label="Prompt")
369
+ with gr.Accordion('Advanced options', open=False):
370
+ neg_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT)
371
+ scale = gr.Slider(
372
+ label="Guidance Scale (Classifier free guidance)", value=7.5, minimum=1, maximum=20, step=0.1)
373
+ n_samples = gr.Slider(label="Num samples", value=1, minimum=1, maximum=1, step=1)
374
+ seed = gr.Slider(label="Seed", value=42, minimum=0, maximum=10000, step=1, randomize=True)
375
+ steps = gr.Slider(label="Steps", value=50, minimum=10, maximum=100, step=1)
376
+ resize_short_edge = gr.Slider(label="Image resolution", value=512, minimum=320, maximum=1024, step=1)
377
+ cond_tau = gr.Slider(
378
+ label="timestamp parameter that determines until which step the adapter is applied",
379
+ value=1.0,
380
+ minimum=0.1,
381
+ maximum=1.0,
382
+ step=0.05)
383
+ submit = gr.Button("Generate")
384
+
385
+ with gr.Box():
386
+ gr.Markdown("<h5><center>Results</center></h5>")
387
+ with gr.Column():
388
+ output = gr.Gallery().style(grid=2, height='auto')
389
+ cond = gr.Gallery().style(grid=2, height='auto')
390
 
391
  inps = list(chain(btns, ims1, ims2, cond_weights))
392
 
393
+ inps.extend([prompt, neg_prompt, scale, n_samples, seed, steps, resize_short_edge, cond_tau, base_model])
394
+ submit.click(fn=processer.run, inputs=inps, outputs=[output, cond])
395
 
396
  ex = gr.Examples([
397
  [
398
  "Image",
399
  "Nothing",
400
+ "Nothing",
401
  "Image",
402
  "Nothing",
403
  "Nothing",
 
420
  1,
421
  1,
422
  1,
423
+ 1,
424
  "master sword",
425
  "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
426
  7.5,
 
433
  [
434
  "Image",
435
  "Nothing",
436
+ "Nothing",
437
  "Image",
438
  "Nothing",
439
  "Nothing",
 
456
  1,
457
  1,
458
  1,
459
+ 1,
460
  "motorcycle",
461
  "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
462
  7.5,