georgefen commited on
Commit
98cc8c1
β€’
1 Parent(s): 37210be

recover gpu support

Browse files
app.py CHANGED
@@ -16,9 +16,14 @@ from cldm.ddim_hacked import DDIMSampler
16
  import dlib
17
  from PIL import Image, ImageDraw
18
 
 
 
 
 
 
19
  model = create_model('./models/cldm_v15.yaml').cpu()
20
  model.load_state_dict(load_state_dict('./models/control_sd15_landmarks.pth', location='cpu'))
21
- model = model
22
  ddim_sampler = DDIMSampler(model)
23
 
24
  detector = dlib.get_frontal_face_detector()
@@ -56,7 +61,7 @@ def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resoluti
56
  detected_map = get_68landmarks_img(img)
57
  detected_map = HWC3(detected_map)
58
 
59
- control = torch.from_numpy(detected_map.copy()).float() / 255.0
60
  control = torch.stack([control for _ in range(num_samples)], dim=0)
61
  control = einops.rearrange(control, 'b h w c -> b c h w').clone()
62
 
 
16
  import dlib
17
  from PIL import Image, ImageDraw
18
 
19
+ if torch.cuda.is_available():
20
+ device = torch.device("cuda")
21
+ else:
22
+ device = torch.device("cpu")
23
+
24
  model = create_model('./models/cldm_v15.yaml').cpu()
25
  model.load_state_dict(load_state_dict('./models/control_sd15_landmarks.pth', location='cpu'))
26
+ model = model.to(device)
27
  ddim_sampler = DDIMSampler(model)
28
 
29
  detector = dlib.get_frontal_face_detector()
 
61
  detected_map = get_68landmarks_img(img)
62
  detected_map = HWC3(detected_map)
63
 
64
+ control = torch.from_numpy(detected_map.copy()).float().to(device) / 255.0
65
  control = torch.stack([control for _ in range(num_samples)], dim=0)
66
  control = einops.rearrange(control, 'b h w c -> b c h w').clone()
67
 
cldm/cldm.py CHANGED
@@ -18,6 +18,12 @@ from ldm.models.diffusion.ddpm import LatentDiffusion
18
  from ldm.util import log_txt_as_img, exists, instantiate_from_config
19
  from ldm.models.diffusion.ddim import DDIMSampler
20
 
 
 
 
 
 
 
21
 
22
  class ControlledUnetModel(UNetModel):
23
  def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs):
@@ -424,12 +430,12 @@ class ControlLDM(LatentDiffusion):
424
 
425
  def low_vram_shift(self, is_diffusing):
426
  if is_diffusing:
427
- self.model = self.model.cpu()
428
- self.control_model = self.control_model.cpu()
429
  self.first_stage_model = self.first_stage_model.cpu()
430
  self.cond_stage_model = self.cond_stage_model.cpu()
431
  else:
432
  self.model = self.model.cpu()
433
  self.control_model = self.control_model.cpu()
434
- self.first_stage_model = self.first_stage_model.cpu()
435
- self.cond_stage_model = self.cond_stage_model.cpu()
 
18
  from ldm.util import log_txt_as_img, exists, instantiate_from_config
19
  from ldm.models.diffusion.ddim import DDIMSampler
20
 
21
+ if torch.cuda.is_available():
22
+ device = torch.device("cuda")
23
+ device_type = "cuda"
24
+ else:
25
+ device = torch.device("cpu")
26
+ device_type = "cpu"
27
 
28
  class ControlledUnetModel(UNetModel):
29
  def forward(self, x, timesteps=None, context=None, control=None, only_mid_control=False, **kwargs):
 
430
 
431
  def low_vram_shift(self, is_diffusing):
432
  if is_diffusing:
433
+ self.model = self.model.to(device)
434
+ self.control_model = self.control_model.to(device)
435
  self.first_stage_model = self.first_stage_model.cpu()
436
  self.cond_stage_model = self.cond_stage_model.cpu()
437
  else:
438
  self.model = self.model.cpu()
439
  self.control_model = self.control_model.cpu()
440
+ self.first_stage_model = self.first_stage_model.to(device)
441
+ self.cond_stage_model = self.cond_stage_model.to(device)
cldm/ddim_hacked.py CHANGED
@@ -6,6 +6,10 @@ from tqdm import tqdm
6
 
7
  from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
8
 
 
 
 
 
9
 
10
  class DDIMSampler(object):
11
  def __init__(self, model, schedule="linear", **kwargs):
@@ -16,8 +20,8 @@ class DDIMSampler(object):
16
 
17
  def register_buffer(self, name, attr):
18
  if type(attr) == torch.Tensor:
19
- if attr.device != torch.device("cpu"):
20
- attr = attr.to(torch.device("cpu"))
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
6
 
7
  from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
8
 
9
+ if torch.cuda.is_available():
10
+ device = torch.device("cuda")
11
+ else:
12
+ device = torch.device("cpu")
13
 
14
  class DDIMSampler(object):
15
  def __init__(self, model, schedule="linear", **kwargs):
 
20
 
21
  def register_buffer(self, name, attr):
22
  if type(attr) == torch.Tensor:
23
+ if attr.device != device:
24
+ attr = attr.to(device)
25
  setattr(self, name, attr)
26
 
27
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/models/diffusion/ddim.py CHANGED
@@ -6,6 +6,12 @@ from tqdm import tqdm
6
 
7
  from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
8
 
 
 
 
 
 
 
9
 
10
  class DDIMSampler(object):
11
  def __init__(self, model, schedule="linear", **kwargs):
@@ -16,8 +22,8 @@ class DDIMSampler(object):
16
 
17
  def register_buffer(self, name, attr):
18
  if type(attr) == torch.Tensor:
19
- if attr.device != torch.device("cpu"):
20
- attr = attr.to(torch.device("cpu"))
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
6
 
7
  from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
8
 
9
+ if torch.cuda.is_available():
10
+ device = torch.device("cuda")
11
+ device_type = "cuda"
12
+ else:
13
+ device = torch.device("cpu")
14
+ device_type = "cpu"
15
 
16
  class DDIMSampler(object):
17
  def __init__(self, model, schedule="linear", **kwargs):
 
22
 
23
  def register_buffer(self, name, attr):
24
  if type(attr) == torch.Tensor:
25
+ if attr.device != torch.device(device_type):
26
+ attr = attr.to(torch.device(device_type))
27
  setattr(self, name, attr)
28
 
29
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/modules/attention.py CHANGED
@@ -8,6 +8,12 @@ from typing import Optional, Any
8
 
9
  from ldm.modules.diffusionmodules.util import checkpoint
10
 
 
 
 
 
 
 
11
 
12
  try:
13
  import xformers
@@ -172,7 +178,7 @@ class CrossAttention(nn.Module):
172
 
173
  # force cast to fp32 to avoid overflowing
174
  if _ATTN_PRECISION =="fp32":
175
- with torch.autocast(enabled=False, device_type = 'cpu'):
176
  q, k = q.float(), k.float()
177
  sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
178
  else:
 
8
 
9
  from ldm.modules.diffusionmodules.util import checkpoint
10
 
11
+ if torch.cuda.is_available():
12
+ device = torch.device("cuda")
13
+ device_type = "cuda"
14
+ else:
15
+ device = torch.device("cpu")
16
+ device_type = "cpu"
17
 
18
  try:
19
  import xformers
 
178
 
179
  # force cast to fp32 to avoid overflowing
180
  if _ATTN_PRECISION =="fp32":
181
+ with torch.autocast(enabled=False, device_type = device_type):
182
  q, k = q.float(), k.float()
183
  sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
184
  else:
ldm/modules/diffusionmodules/util.py CHANGED
@@ -133,7 +133,7 @@ class CheckpointFunction(torch.autograd.Function):
133
  def backward(ctx, *output_grads):
134
  ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
135
  with torch.enable_grad(), \
136
- torch.cpu.amp.autocast(**ctx.gpu_autocast_kwargs):
137
  # Fixes a bug where the first op in run_function modifies the
138
  # Tensor storage in place, which is not allowed for detach()'d
139
  # Tensors.
 
133
  def backward(ctx, *output_grads):
134
  ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
135
  with torch.enable_grad(), \
136
+ torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
137
  # Fixes a bug where the first op in run_function modifies the
138
  # Tensor storage in place, which is not allowed for detach()'d
139
  # Tensors.
ldm/modules/encoders/modules.py CHANGED
@@ -7,6 +7,12 @@ from transformers import T5Tokenizer, T5EncoderModel, CLIPTokenizer, CLIPTextMod
7
  import open_clip
8
  from ldm.util import default, count_params
9
 
 
 
 
 
 
 
10
 
11
  class AbstractEncoder(nn.Module):
12
  def __init__(self):
@@ -42,7 +48,7 @@ class ClassEmbedder(nn.Module):
42
  c = self.embedding(c)
43
  return c
44
 
45
- def get_unconditional_conditioning(self, bs, device="cpu"):
46
  uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
47
  uc = torch.ones((bs,), device=device) * uc_class
48
  uc = {self.key: uc}
@@ -57,7 +63,7 @@ def disabled_train(self, mode=True):
57
 
58
  class FrozenT5Embedder(AbstractEncoder):
59
  """Uses the T5 transformer encoder for text"""
60
- def __init__(self, version="google/t5-v1_1-large", device="cpu", max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
61
  super().__init__()
62
  self.tokenizer = T5Tokenizer.from_pretrained(version)
63
  self.transformer = T5EncoderModel.from_pretrained(version)
@@ -92,7 +98,7 @@ class FrozenCLIPEmbedder(AbstractEncoder):
92
  "pooled",
93
  "hidden"
94
  ]
95
- def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
96
  freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
97
  super().__init__()
98
  assert layer in self.LAYERS
@@ -140,11 +146,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
140
  "last",
141
  "penultimate"
142
  ]
143
- def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device="cpu", max_length=77,
144
  freeze=True, layer="last"):
145
  super().__init__()
146
  assert layer in self.LAYERS
147
- model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
148
  del model.visual
149
  self.model = model
150
 
@@ -194,7 +200,7 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
194
 
195
 
196
  class FrozenCLIPT5Encoder(AbstractEncoder):
197
- def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device="cpu",
198
  clip_max_length=77, t5_max_length=77):
199
  super().__init__()
200
  self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
 
7
  import open_clip
8
  from ldm.util import default, count_params
9
 
10
+ if torch.cuda.is_available():
11
+ device = torch.device("cuda")
12
+ device_type = "cuda"
13
+ else:
14
+ device = torch.device("cpu")
15
+ device_type = "cpu"
16
 
17
  class AbstractEncoder(nn.Module):
18
  def __init__(self):
 
48
  c = self.embedding(c)
49
  return c
50
 
51
+ def get_unconditional_conditioning(self, bs, device=device_type):
52
  uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
53
  uc = torch.ones((bs,), device=device) * uc_class
54
  uc = {self.key: uc}
 
63
 
64
  class FrozenT5Embedder(AbstractEncoder):
65
  """Uses the T5 transformer encoder for text"""
66
+ def __init__(self, version="google/t5-v1_1-large", device=device_type, max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
67
  super().__init__()
68
  self.tokenizer = T5Tokenizer.from_pretrained(version)
69
  self.transformer = T5EncoderModel.from_pretrained(version)
 
98
  "pooled",
99
  "hidden"
100
  ]
101
+ def __init__(self, version="openai/clip-vit-large-patch14", device=device_type, max_length=77,
102
  freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
103
  super().__init__()
104
  assert layer in self.LAYERS
 
146
  "last",
147
  "penultimate"
148
  ]
149
+ def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device=device_type, max_length=77,
150
  freeze=True, layer="last"):
151
  super().__init__()
152
  assert layer in self.LAYERS
153
+ model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device(device_type), pretrained=version)
154
  del model.visual
155
  self.model = model
156
 
 
200
 
201
 
202
  class FrozenCLIPT5Encoder(AbstractEncoder):
203
+ def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device=device_type,
204
  clip_max_length=77, t5_max_length=77):
205
  super().__init__()
206
  self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)