Adapter commited on
Commit
8a4a8d3
1 Parent(s): e8e5f30
Files changed (3) hide show
  1. app.py +2 -2
  2. demo/model.py +4 -5
  3. ldm/models/diffusion/plms.py +7 -5
app.py CHANGED
@@ -79,8 +79,8 @@ with gr.Blocks(css='style.css') as demo:
79
  create_demo_color(model.process_color)
80
  with gr.TabItem('Color + Sketch'):
81
  create_demo_color_sketch(model.process_color_sketch)
82
- # with gr.TabItem('Style + Sketch'):
83
- # create_demo_style_sketch(model.process_style_sketch)
84
  with gr.TabItem('Segmentation'):
85
  create_demo_seg(model.process_seg)
86
  demo.queue().launch(debug=True, server_name='0.0.0.0')
 
79
  create_demo_color(model.process_color)
80
  with gr.TabItem('Color + Sketch'):
81
  create_demo_color_sketch(model.process_color_sketch)
82
+ with gr.TabItem('Style + Sketch'):
83
+ create_demo_style_sketch(model.process_style_sketch)
84
  with gr.TabItem('Segmentation'):
85
  create_demo_seg(model.process_seg)
86
  demo.queue().launch(debug=True, server_name='0.0.0.0')
demo/model.py CHANGED
@@ -177,8 +177,8 @@ class Model_all:
177
  # style part
178
  self.model_style = StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8).to(device)
179
  self.model_style.load_state_dict(torch.load("models/t2iadapter_style_sd14v1.pth", map_location=device))
180
- self.clip_processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
181
- self.clip_vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14').to(device)
182
 
183
  device = 'cpu'
184
  ## mmpose
@@ -878,9 +878,8 @@ class Model_all:
878
  elif type_in == 'Image':
879
  from ldm.modules.structure_condition.openpose.api import OpenposeInference
880
  model = OpenposeInference()
881
- keypose = model(im)
882
- im_pose = keypose.copy()[:,:,::-1]
883
- # keypose = img2tensor(keypose).unsqueeze(0) / 255.
884
 
885
  # extract condition features
886
  c = self.base_model.get_learned_conditioning([prompt + ', ' + pos_prompt])
 
177
  # style part
178
  self.model_style = StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8).to(device)
179
  self.model_style.load_state_dict(torch.load("models/t2iadapter_style_sd14v1.pth", map_location=device))
180
+ self.clip_processor = CLIPProcessor.from_pretrained('models/clip/8d052a0f05efbaefbc9e8786ba291cfdf93e5bff')
181
+ self.clip_vision_model = CLIPVisionModel.from_pretrained('models/clip/8d052a0f05efbaefbc9e8786ba291cfdf93e5bff').to(device)
182
 
183
  device = 'cpu'
184
  ## mmpose
 
878
  elif type_in == 'Image':
879
  from ldm.modules.structure_condition.openpose.api import OpenposeInference
880
  model = OpenposeInference()
881
+ keypose = model(im[:,:,::-1])
882
+ im_pose = keypose.copy()
 
883
 
884
  # extract condition features
885
  c = self.base_model.get_learned_conditioning([prompt + ', ' + pos_prompt])
ldm/models/diffusion/plms.py CHANGED
@@ -117,7 +117,7 @@ class PLMSSampler(object):
117
  features_adapter2=copy.deepcopy(features_adapter2),
118
  mode = mode,
119
  con_strength = con_strength,
120
- style_feature=style_feature
121
  )
122
  return samples, intermediates
123
 
@@ -152,6 +152,8 @@ class PLMSSampler(object):
152
  index = total_steps - i - 1
153
  ts = torch.full((b,), step, device=device, dtype=torch.long)
154
  ts_next = torch.full((b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long)
 
 
155
 
156
  if mask is not None :#and index>=10:
157
  assert x0 is not None
@@ -170,20 +172,20 @@ class PLMSSampler(object):
170
  features_adapter = features_adapter1
171
 
172
  if index>25:
173
- cond = torch.cat([cond, style_feature], dim=1)
174
- unconditional_conditioning = torch.cat(
175
  [unconditional_conditioning, unconditional_conditioning[:, -8:, :]], dim=1)
176
  elif mode == 'mul':
177
  features_adapter = [a1i*0.5 + a2i for a1i, a2i in zip(features_adapter1, features_adapter2)]
178
  else:
179
  features_adapter = features_adapter1
180
 
181
- outs = self.p_sample_plms(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
182
  quantize_denoised=quantize_denoised, temperature=temperature,
183
  noise_dropout=noise_dropout, score_corrector=score_corrector,
184
  corrector_kwargs=corrector_kwargs,
185
  unconditional_guidance_scale=unconditional_guidance_scale,
186
- unconditional_conditioning=unconditional_conditioning,
187
  old_eps=old_eps, t_next=ts_next, features_adapter=copy.deepcopy(features_adapter))
188
 
189
  img, pred_x0, e_t = outs
 
117
  features_adapter2=copy.deepcopy(features_adapter2),
118
  mode = mode,
119
  con_strength = con_strength,
120
+ style_feature=style_feature#.clone()
121
  )
122
  return samples, intermediates
123
 
 
152
  index = total_steps - i - 1
153
  ts = torch.full((b,), step, device=device, dtype=torch.long)
154
  ts_next = torch.full((b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long)
155
+ cond_in = cond
156
+ unconditional_conditioning_in = unconditional_conditioning
157
 
158
  if mask is not None :#and index>=10:
159
  assert x0 is not None
 
172
  features_adapter = features_adapter1
173
 
174
  if index>25:
175
+ cond_in = torch.cat([cond, style_feature.clone()], dim=1)
176
+ unconditional_conditioning_in = torch.cat(
177
  [unconditional_conditioning, unconditional_conditioning[:, -8:, :]], dim=1)
178
  elif mode == 'mul':
179
  features_adapter = [a1i*0.5 + a2i for a1i, a2i in zip(features_adapter1, features_adapter2)]
180
  else:
181
  features_adapter = features_adapter1
182
 
183
+ outs = self.p_sample_plms(img, cond_in, ts, index=index, use_original_steps=ddim_use_original_steps,
184
  quantize_denoised=quantize_denoised, temperature=temperature,
185
  noise_dropout=noise_dropout, score_corrector=score_corrector,
186
  corrector_kwargs=corrector_kwargs,
187
  unconditional_guidance_scale=unconditional_guidance_scale,
188
+ unconditional_conditioning=unconditional_conditioning_in,
189
  old_eps=old_eps, t_next=ts_next, features_adapter=copy.deepcopy(features_adapter))
190
 
191
  img, pred_x0, e_t = outs