Spaces:
Paused
Paused
Create utils.py
Browse files- ip_adapter/utils.py +93 -0
ip_adapter/utils.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
attn_maps = {}
|
7 |
+
def hook_fn(name):
|
8 |
+
def forward_hook(module, input, output):
|
9 |
+
if hasattr(module.processor, "attn_map"):
|
10 |
+
attn_maps[name] = module.processor.attn_map
|
11 |
+
del module.processor.attn_map
|
12 |
+
|
13 |
+
return forward_hook
|
14 |
+
|
15 |
+
def register_cross_attention_hook(unet):
|
16 |
+
for name, module in unet.named_modules():
|
17 |
+
if name.split('.')[-1].startswith('attn2'):
|
18 |
+
module.register_forward_hook(hook_fn(name))
|
19 |
+
|
20 |
+
return unet
|
21 |
+
|
22 |
+
def upscale(attn_map, target_size):
|
23 |
+
attn_map = torch.mean(attn_map, dim=0)
|
24 |
+
attn_map = attn_map.permute(1,0)
|
25 |
+
temp_size = None
|
26 |
+
|
27 |
+
for i in range(0,5):
|
28 |
+
scale = 2 ** i
|
29 |
+
if ( target_size[0] // scale ) * ( target_size[1] // scale) == attn_map.shape[1]*64:
|
30 |
+
temp_size = (target_size[0]//(scale*8), target_size[1]//(scale*8))
|
31 |
+
break
|
32 |
+
|
33 |
+
assert temp_size is not None, "temp_size cannot is None"
|
34 |
+
|
35 |
+
attn_map = attn_map.view(attn_map.shape[0], *temp_size)
|
36 |
+
|
37 |
+
attn_map = F.interpolate(
|
38 |
+
attn_map.unsqueeze(0).to(dtype=torch.float32),
|
39 |
+
size=target_size,
|
40 |
+
mode='bilinear',
|
41 |
+
align_corners=False
|
42 |
+
)[0]
|
43 |
+
|
44 |
+
attn_map = torch.softmax(attn_map, dim=0)
|
45 |
+
return attn_map
|
46 |
+
def get_net_attn_map(image_size, batch_size=2, instance_or_negative=False, detach=True):
|
47 |
+
|
48 |
+
idx = 0 if instance_or_negative else 1
|
49 |
+
net_attn_maps = []
|
50 |
+
|
51 |
+
for name, attn_map in attn_maps.items():
|
52 |
+
attn_map = attn_map.cpu() if detach else attn_map
|
53 |
+
attn_map = torch.chunk(attn_map, batch_size)[idx].squeeze()
|
54 |
+
attn_map = upscale(attn_map, image_size)
|
55 |
+
net_attn_maps.append(attn_map)
|
56 |
+
|
57 |
+
net_attn_maps = torch.mean(torch.stack(net_attn_maps,dim=0),dim=0)
|
58 |
+
|
59 |
+
return net_attn_maps
|
60 |
+
|
61 |
+
def attnmaps2images(net_attn_maps):
|
62 |
+
|
63 |
+
#total_attn_scores = 0
|
64 |
+
images = []
|
65 |
+
|
66 |
+
for attn_map in net_attn_maps:
|
67 |
+
attn_map = attn_map.cpu().numpy()
|
68 |
+
#total_attn_scores += attn_map.mean().item()
|
69 |
+
|
70 |
+
normalized_attn_map = (attn_map - np.min(attn_map)) / (np.max(attn_map) - np.min(attn_map)) * 255
|
71 |
+
normalized_attn_map = normalized_attn_map.astype(np.uint8)
|
72 |
+
#print("norm: ", normalized_attn_map.shape)
|
73 |
+
image = Image.fromarray(normalized_attn_map)
|
74 |
+
|
75 |
+
#image = fix_save_attn_map(attn_map)
|
76 |
+
images.append(image)
|
77 |
+
|
78 |
+
#print(total_attn_scores)
|
79 |
+
return images
|
80 |
+
def is_torch2_available():
|
81 |
+
return hasattr(F, "scaled_dot_product_attention")
|
82 |
+
|
83 |
+
def get_generator(seed, device):
|
84 |
+
|
85 |
+
if seed is not None:
|
86 |
+
if isinstance(seed, list):
|
87 |
+
generator = [torch.Generator(device).manual_seed(seed_item) for seed_item in seed]
|
88 |
+
else:
|
89 |
+
generator = torch.Generator(device).manual_seed(seed)
|
90 |
+
else:
|
91 |
+
generator = None
|
92 |
+
|
93 |
+
return generator
|