eggarsway commited on
Commit
b0369c2
1 Parent(s): 7ea5b16
DirectedDiffusion/AttnCore.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ORIGINAL SOURCE CODE
3
+ https://github.com/huggingface/diffusers/blob/91ddd2a25b848df0fa1262d4f1cd98c7ccb87750/src/diffusers/models/attention.py#L276
4
+ """
5
+
6
+ import torch
7
+ import math
8
+ import copy
9
+ import numpy as np
10
+
11
+ def gaussian_2d(x=0, y=0, mx=0, my=0, sx=1, sy=1):
12
+ """ Gaussian weight
13
+ Args:
14
+ x(float): sample x
15
+ x(float): sample x
16
+ """
17
+ return (
18
+ 1
19
+ / (2 * math.pi * sx * sy)
20
+ * torch.exp(-((x - mx) ** 2 / (2 * sx ** 2) + (y - my) ** 2 / (2 * sy ** 2)))
21
+ )
22
+
23
+ def init_attention_edit(
24
+ unet, tokens=[], rios=[], num_trailing_attn=[], length_prompt=0, noise_scale=[],
25
+ ):
26
+ """ To initialize overwritting the attention module.
27
+ """
28
+ rios_ = copy.deepcopy(rios)
29
+ tokens_ = copy.deepcopy(tokens)
30
+ num_regions = len(rios)
31
+ num_trailing_attn_ = copy.deepcopy(num_trailing_attn)
32
+ noise_scale_ = copy.deepcopy(noise_scale)
33
+
34
+ for i in range(num_regions):
35
+ total_unrelated = length_prompt + 1 + num_trailing_attn[i]
36
+ tokens_trailing = []
37
+ for j in range(length_prompt + 1, total_unrelated): # trailing
38
+ if j > 76:
39
+ break
40
+ tokens_[i].append(j)
41
+ # tokens_trailing.append(j)
42
+ tokens_[i] = list(set(tokens_[i]))
43
+
44
+ def new_attention(self, query, key, value):
45
+
46
+ attn_slice = torch.matmul(query, key.transpose(-1, -2)) * self.scale
47
+ attn_slice = attn_slice.softmax(dim=-1)
48
+
49
+ if self.use_edited_attn and attn_slice.size()[2] == 77:
50
+
51
+ in_tokens = tokens_[:]
52
+ rios = rios_[:]
53
+ num_trailing_attn = num_trailing_attn_[:]
54
+ dim = int(np.sqrt(attn_slice.size()[1]))
55
+ attn_slice = attn_slice.view(8, dim, dim, 77)
56
+
57
+ # the mask for all interested words in prompt
58
+ # global_mask = torch.ones_like(attn_slice, dtype=torch.bool)
59
+ global_mask = torch.zeros_like(attn_slice, dtype=torch.bool)
60
+ for i in range(num_regions):
61
+ # region
62
+ left = int(dim * rios[i][0])
63
+ right = int(dim * rios[i][1])
64
+ top = int(dim * rios[i][2])
65
+ bottom = int(dim * rios[i][3])
66
+
67
+ # add noise to avoid nullity
68
+ if self.use_add_noise:
69
+ tmp = attn_slice[
70
+ :, top:bottom, left:right, in_tokens[i]
71
+ ].clone() * (noise_scale_[i] * self.noise_level)
72
+ # TODO: the uniform distribution on the region
73
+ w = tmp.shape[2]
74
+ h = tmp.shape[1]
75
+ # noise_u = torch.abs(torch.randn(h, w))
76
+ x = torch.linspace(0, h, h)
77
+ y = torch.linspace(0, w, w)
78
+ x, y = torch.meshgrid(x, y, indexing="ij")
79
+ noise_g = gaussian_2d(
80
+ x,
81
+ y,
82
+ mx=int(h / 2),
83
+ my=int(w / 2),
84
+ sx=float(h) / 2.0,
85
+ sy=float(w) / 2.0,
86
+ )
87
+ noise = noise_g
88
+ noise = (
89
+ noise.unsqueeze(0)
90
+ .unsqueeze(-1)
91
+ .repeat(tmp.shape[0], 1, 1, tmp.shape[-1])
92
+ .to(attn_slice.device)
93
+ )
94
+
95
+ attn_slice[:, top:bottom, left:right, in_tokens[i]] = tmp + noise
96
+
97
+ scale = 0
98
+ mask = torch.ones_like(attn_slice, dtype=torch.bool)
99
+ mask[:, :, right:, in_tokens[i]] = False
100
+ mask[:, :, :left, in_tokens[i]] = False
101
+ mask[:, :top, :, in_tokens[i]] = False
102
+ mask[:, bottom:, :, in_tokens[i]] = False
103
+ global_mask[..., in_tokens[i]] |= mask[..., in_tokens[i]]
104
+
105
+ mask = torch.zeros_like(attn_slice, dtype=torch.bool)
106
+ mask[:, top:bottom, :, in_tokens[i]] = True
107
+ mask[:, :, left:right, in_tokens[i]] = True
108
+ global_mask[..., in_tokens[i]] &= mask[..., in_tokens[i]]
109
+
110
+ zeros_indices = torch.where(global_mask == False)
111
+ # global_mask = torch.tensor(
112
+ # global_mask.clone().detach(), dtype=torch.float16
113
+ # )
114
+ global_mask = global_mask.clone().detach().half()
115
+ global_mask[zeros_indices] = 0.01
116
+ attn_slice *= global_mask
117
+ attn_slice = attn_slice.view(8, dim * dim, 77)
118
+ self.use_add_noise = False
119
+ self.use_edited_attn = False
120
+
121
+ self.attn = attn_slice.clone()
122
+ hidden_states = torch.matmul(attn_slice, value)
123
+ # reshape hidden_states, e.g., torch.Size([1, 4096, 320])
124
+ hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
125
+ return hidden_states
126
+
127
+ for name, module in unet.named_modules():
128
+ module_name = type(module).__name__
129
+ if module_name == "CrossAttention":
130
+ module.last_attn_slice = None
131
+ module.use_last_attn_slice = False
132
+ module.use_last_attn_weights = False
133
+ module.use_edited_attn = False
134
+ module.save_last_attn_slice = False
135
+ module.use_add_noise = False
136
+ module.noise_level = 0.0
137
+ module.attn = None
138
+ module._attention = new_attention.__get__(module, type(module))
DirectedDiffusion/AttnEditorUtils.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ import numpy as np
4
+ import torchvision
5
+ from PIL import Image
6
+ from transformers import CLIPModel, CLIPTextModel, CLIPTokenizer, CLIPProcessor
7
+ from diffusers import AutoencoderKL, UNet2DConditionModel
8
+
9
+
10
+ def get_embeds(prompt, clip, clip_tokenizer, device="cuda"):
11
+ tokens = clip_tokenizer(
12
+ prompt,
13
+ padding="max_length",
14
+ max_length=clip_tokenizer.model_max_length,
15
+ truncation=True,
16
+ return_tensors="pt",
17
+ return_overflowing_tokens=True,
18
+ )
19
+ embeds = clip(tokens.input_ids.to(device)).last_hidden_state
20
+ return embeds
21
+
22
+
23
+ @torch.no_grad()
24
+ def get_image_from_latent(vae, latent):
25
+ latent = latent / 0.18215
26
+ image = vae.decode(latent.to(vae.dtype)).sample
27
+ image = (image / 2 + 0.5).clamp(0, 1)
28
+ image = image.cpu().permute(0, 2, 3, 1).detach().numpy()
29
+ image = (image[0] * 255).round().astype("uint8")
30
+ return Image.fromarray(image)
31
+
32
+
33
+ @torch.no_grad()
34
+ def get_latent_from_image(vae, image, device="cuda"):
35
+ generator = torch.cuda.manual_seed(798122)
36
+ # Resize and transpose for numpy b h w c -> torch b c h w
37
+ # image = image.resize((width, height), resample=Image.Resampling.LANCZOS)
38
+ image = np.array(image).astype(np.float16) / 255.0 * 2.0 - 1.0
39
+ image = torch.from_numpy(image[np.newaxis, ...].transpose(0, 3, 1, 2))
40
+ # If there is alpha channel, composite alpha for white, as the diffusion model does not support alpha channel
41
+ if image.shape[1] > 3:
42
+ image = image[:, :3] * image[:, 3:] + (1 - image[:, 3:])
43
+ # Move image to GPU
44
+ image = image.to(device)
45
+ # Encode image
46
+ init_latent = vae.encode(image).latent_dist.sample(generator=generator) * 0.18215
47
+ return init_latent
48
+
49
+
50
+ def load_all_models(model_path_diffusion):
51
+
52
+ clip_tokenizer = CLIPTokenizer.from_pretrained(
53
+ model_path_diffusion, subfolder="tokenizer"
54
+ )
55
+ clip_text_model = CLIPTextModel.from_pretrained(
56
+ model_path_diffusion, subfolder="text_encoder", torch_dtype=torch.float16
57
+ )
58
+
59
+ # Init diffusion model
60
+ auth_token = True # Replace this with huggingface auth token as a string if model is not already downloaded
61
+ # model_path_diffusion = "assets/models/stable-diffusion-v1-4"
62
+ unet = UNet2DConditionModel.from_pretrained(
63
+ model_path_diffusion,
64
+ subfolder="unet",
65
+ use_auth_token=auth_token,
66
+ revision="fp16",
67
+ torch_dtype=torch.float16,
68
+ )
69
+ vae = AutoencoderKL.from_pretrained(
70
+ model_path_diffusion,
71
+ subfolder="vae",
72
+ use_auth_token=auth_token,
73
+ revision="fp16",
74
+ torch_dtype=torch.float16,
75
+ )
76
+ # Move to GPU
77
+ device = "cuda"
78
+ unet.to(device)
79
+ vae.to(device)
80
+ clip_text_model.to(device)
81
+ model_bundle = {}
82
+ model_bundle["unet"] = unet
83
+ model_bundle["vae"] = vae
84
+ model_bundle["clip_tokenizer"] = clip_tokenizer
85
+ model_bundle["clip_text_model"] = clip_text_model
86
+ return model_bundle
87
+
88
+
89
+ @torch.no_grad()
90
+ def check_clip_score(clip_model, clip_processor, prompts=[], images=[]):
91
+ if len(prompts) == 1:
92
+ dim = 0
93
+ if len(images) == 1:
94
+ dim = 1
95
+ inputs = clip_processor(
96
+ text=prompts, images=images, return_tensors="pt", padding=True
97
+ )
98
+ inputs["pixel_values"] = torch.tensor(
99
+ inputs["pixel_values"], dtype=clip_model.dtype, device=clip_model.device
100
+ )
101
+ inputs["input_ids"] = torch.tensor(inputs["input_ids"], device=clip_model.device)
102
+ inputs["attention_mask"] = torch.tensor(
103
+ inputs["attention_mask"], device=clip_model.device
104
+ )
105
+ outputs = clip_model(**inputs)
106
+ a = clip_model.get_image_features(inputs["pixel_values"])
107
+ b = clip_model.get_text_features(inputs["input_ids"])
108
+ prob = torch.matmul(a, b.t()).softmax(dim=dim)
109
+ return prob
110
+
111
+
112
+ def get_attn(unet, use=True):
113
+ attn = []
114
+ for name, module in unet.named_modules():
115
+ module_name = type(module).__name__
116
+ if module_name == "CrossAttention" and "attn2" in name:
117
+ if module.attn.size() == torch.Size([8, 1024, 77]):
118
+ attn.append(module.attn)
119
+ attn = torch.cat(attn, dim=0)
120
+ attn = torch.sum(attn, dim=0)
121
+ resized = torch.zeros([64, 64, 77])
122
+ f = torchvision.transforms.Resize(size=(64, 64))
123
+ for i in range(77):
124
+ dim = int(np.sqrt(attn.shape[0]))
125
+ attn_slice = attn[..., i].view(1, dim, dim)
126
+ resized[..., i] = f(attn_slice)[0]
127
+ return resized.cpu().numpy()
128
+
129
+
130
+ def save_attn(unet):
131
+ for name, module in unet.named_modules():
132
+ module_name = type(module).__name__
133
+ if module_name == "CrossAttention" and "attn2" in name:
134
+ folder = "/tmp"
135
+ filepath = os.path.join(folder, name + ".pt")
136
+ torch.save(module.attn, filepath)
137
+ print(filepath)
138
+
139
+
140
+ def use_add_noise(unet, level, use=True):
141
+ for name, module in unet.named_modules():
142
+ module_name = type(module).__name__
143
+ if module_name == "CrossAttention":
144
+ module.use_add_noise = use
145
+ module.noise_level = level
146
+
147
+
148
+ def use_edited_attention(unet, use=True):
149
+ for name, module in unet.named_modules():
150
+ module_name = type(module).__name__
151
+ if module_name == "CrossAttention":
152
+ module.use_edited_attn = use
153
+
154
+
155
+ def prompt_token(prompt, index):
156
+ tokens = clip_tokenizer(
157
+ prompt,
158
+ padding="max_length",
159
+ max_length=clip_tokenizer.model_max_length,
160
+ truncation=True,
161
+ return_tensors="pt",
162
+ return_overflowing_tokens=True,
163
+ ).input_ids[0]
164
+ return clip_tokenizer.decode(tokens[index : index + 1])
DirectedDiffusion/Diffusion.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import random
4
+ import numpy as np
5
+ import datetime
6
+
7
+ from PIL import Image
8
+ from diffusers import LMSDiscreteScheduler
9
+ from tqdm.auto import tqdm
10
+ from torch import autocast
11
+ from difflib import SequenceMatcher
12
+
13
+ import DirectedDiffusion
14
+
15
+
16
+ @torch.no_grad()
17
+ def stablediffusion(
18
+ model_bundle,
19
+ attn_editor_bundle={},
20
+ device="cuda",
21
+ prompt="",
22
+ steps=50,
23
+ seed=None,
24
+ width=512,
25
+ height=512,
26
+ t_start=0,
27
+ guidance_scale=7.5,
28
+ init_latents=None,
29
+ is_save_attn=False,
30
+ is_save_recons=False,
31
+ folder = "./",
32
+ ):
33
+
34
+ # neural networks
35
+ unet = model_bundle["unet"]
36
+ vae = model_bundle["vae"]
37
+ clip_tokenizer = model_bundle["clip_tokenizer"]
38
+ clip = model_bundle["clip_text_model"]
39
+ # attn editor bundle, our stuff
40
+ num_affected_steps = int(attn_editor_bundle.get("num_affected_steps") or 0)
41
+ if not num_affected_steps:
42
+ print("Not using attn editor")
43
+ else:
44
+ print("Using attn editor")
45
+ DirectedDiffusion.AttnCore.init_attention_edit(
46
+ unet,
47
+ tokens=attn_editor_bundle.get("edit_index") or [],
48
+ rios=attn_editor_bundle.get("roi") or [],
49
+ noise_scale=attn_editor_bundle.get("noise_scale") or [],
50
+ length_prompt=len(prompt.split(" ")),
51
+ num_trailing_attn=attn_editor_bundle.get("num_trailing_attn") or [],
52
+ )
53
+
54
+ # Change size to multiple of 64 to prevent size mismatches inside model
55
+ width = width - width % 64
56
+ height = height - height % 64
57
+ # If seed is None, randomly select seed from 0 to 2^32-1
58
+ if seed is None:
59
+ seed = random.randrange(2 ** 32 - 1)
60
+ generator = torch.cuda.manual_seed(seed)
61
+ # Set inference timesteps to scheduler
62
+ scheduler = LMSDiscreteScheduler(
63
+ beta_start=0.00085,
64
+ beta_end=0.012,
65
+ beta_schedule="scaled_linear",
66
+ num_train_timesteps=1000,
67
+ )
68
+ scheduler.set_timesteps(steps)
69
+ scheduler.timesteps = scheduler.timesteps.half().cuda()
70
+
71
+ noise_weight = LMSDiscreteScheduler(
72
+ beta_start=0.00085,
73
+ beta_end=0.012,
74
+ beta_schedule="scaled_linear",
75
+ num_train_timesteps=10,
76
+ )
77
+ noise_weight.set_timesteps(num_affected_steps)
78
+ # if num_affected_steps:
79
+ # noise_weight.set_timesteps(num_affected_steps)
80
+ # noise_weight.timesteps /= torch.max(noise_weight.timesteps)
81
+
82
+ init_latent = torch.zeros(
83
+ (1, unet.in_channels, height // 8, width // 8), device=device
84
+ )
85
+ t_start = t_start
86
+ # Generate random normal noise
87
+ noise = torch.randn(init_latent.shape, generator=generator, device=device)
88
+ # latent = noise * scheduler.init_noise_sigma
89
+ latent = scheduler.add_noise(
90
+ init_latent,
91
+ noise,
92
+ torch.tensor(
93
+ [scheduler.timesteps[t_start]], device=device, dtype=torch.float16
94
+ ),
95
+ ).to(device)
96
+
97
+
98
+
99
+ current_time = datetime.datetime.now()
100
+ current_time = current_time.strftime("%y%m%d-%H%M%S")
101
+ folder = os.path.join(folder, current_time+"_internal")
102
+ if not os.path.exists(folder) and (is_save_attn or is_save_recons):
103
+ os.makedirs(folder)
104
+ # Process clip
105
+ with autocast(device):
106
+ embeds_uncond = DirectedDiffusion.AttnEditorUtils.get_embeds(
107
+ "", clip, clip_tokenizer
108
+ )
109
+ embeds_cond = DirectedDiffusion.AttnEditorUtils.get_embeds(
110
+ prompt, clip, clip_tokenizer
111
+ )
112
+ timesteps = scheduler.timesteps[t_start:]
113
+ for i, t in tqdm(enumerate(timesteps), total=len(timesteps)):
114
+ t_index = t
115
+ latent_model_input = latent
116
+ latent_model_input = scheduler.scale_model_input(
117
+ latent_model_input, t
118
+ ).half()
119
+ noise_pred_uncond = unet(
120
+ latent_model_input, t, encoder_hidden_states=embeds_uncond
121
+ ).sample
122
+
123
+ if i < num_affected_steps:
124
+ DirectedDiffusion.AttnEditorUtils.use_add_noise(
125
+ unet, noise_weight.timesteps[i]
126
+ )
127
+ DirectedDiffusion.AttnEditorUtils.use_edited_attention(unet)
128
+ noise_pred_cond = unet(
129
+ latent_model_input, t, encoder_hidden_states=embeds_cond
130
+ ).sample
131
+
132
+ else:
133
+ noise_pred_cond = unet(
134
+ latent_model_input, t, encoder_hidden_states=embeds_cond
135
+ ).sample
136
+
137
+ delta = noise_pred_cond - noise_pred_uncond
138
+ # Perform guidance
139
+ noise_pred = noise_pred_uncond + guidance_scale * delta
140
+ latent = scheduler.step(noise_pred, t_index, latent).prev_sample
141
+
142
+ if is_save_attn:
143
+ filepath = os.path.join(folder, "ca.{:04d}.jpg".format(i))
144
+ DirectedDiffusion.Plotter.plot_activation(filepath, unet, prompt, clip_tokenizer)
145
+ if is_save_recons:
146
+ filepath = os.path.join(folder, "recons.{:04d}.jpg".format(i))
147
+ recons = DirectedDiffusion.AttnEditorUtils.get_image_from_latent(vae, latent)
148
+ recons.save(filepath)
149
+ return DirectedDiffusion.AttnEditorUtils.get_image_from_latent(vae, latent)
DirectedDiffusion/Plotter.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ """
3
+
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import torchvision
7
+
8
+ import DirectedDiffusion
9
+
10
+ plt.rcParams["figure.figsize"] = [float(v)*1.5 for v in plt.rcParams["figure.figsize"]]
11
+
12
+ def plot_activation(filepath, unet, prompt, clip_tokenizer):
13
+ a = DirectedDiffusion.AttnEditorUtils.get_attn(unet)
14
+ splitted_prompt = prompt.split(" ")
15
+ n = len(splitted_prompt)
16
+ start = 0
17
+ arrs = []
18
+ for j in range(1):
19
+ arr = []
20
+ for i in range(start,start+n):
21
+ b = a[..., i+1] / (a[..., i+1].max() + 0.001)
22
+ arr.append(b.T)
23
+ start += n
24
+ arr = np.hstack(arr)
25
+ arrs.append(arr)
26
+ arrs = np.vstack(arrs).T
27
+ plt.imshow(arrs, cmap='jet', vmin=0, vmax=.8)
28
+ plt.title(prompt)
29
+ plt.savefig(filepath)
DirectedDiffusion/ProgramInfo.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import textwrap
2
+
3
+
4
+ def get_parser_description():
5
+ return textwrap.dedent(
6
+ """\
7
+ ***Stable Diffusion Local Editor***
8
+
9
+ ** Examples (Sginel run, single region):
10
+
11
+ # To locate the car at top right of the image
12
+ python ./bin/SdEditorCmd.py -roi "0.5,1.0,0.0,0.5" -ei "1,2,3" -nt "10" -s "2.0" -ns 15 -p "A yellow car on a bridge" -m
13
+
14
+ ** Example (Single run, multiple regions)
15
+
16
+ The following arugment flags are part of regioning strategy controlling the effects of specific region during attention editing step.
17
+ Their length must be the same, otherwise the program will be terminated.
18
+ --num-trailing-attn (-nt) # a string of integers
19
+ --noise-scale (-s) # a string of floats
20
+ --edit-index (-ei) # multiple strings of integers
21
+ --region-of-interest (-roi) # multiple strings of integers
22
+
23
+ # Two region case
24
+ python ./bin/SdEditorCmd.py -roi "0.4,0.7,0.1,0.5" "0.4,0.7,0.5,0.9" -ei "2,3" "8,9" -nt "30,30" -ns 10 -s "1.0,1.0" -p "A red cube on top of a blue sphere" -m -sd 2483964026830
25
+
26
+ ** Examples (Grid Search):
27
+
28
+ The following arguments are part of the grid search method to speed up the experimental efficiency:
29
+ --num-trailing-attn (-nt)
30
+ --noise-scale(-s)
31
+ --num-affected-steps(-ns)
32
+ --diffusion-steps (-ds)
33
+
34
+ # The following command will run four times with varied options of -nt and -ns
35
+ python ./bin/SdEditorCmd.py -roi "0.5,1.0,0.0,0.5" -ei "1,2,3" -nt 5 10 20 -ns 5 10 -s 2.5 -p "A yellow car running on a bridge" -m
36
+
37
+ ** Others
38
+
39
+ Using -m flag will draw the metadata on the saved image for quick reference.
40
+ Using -is flag will show the final result after each diffusion run
41
+
42
+
43
+ ** Lazy search
44
+
45
+ We offer a lazy grid search command at the initial experiment stage, for instance
46
+
47
+ # for large number of parameters
48
+ python ./bin/SdEditorCmd.py -roi "0.4,0.7,0.1,0.5" "0.4,0.7,0.5,0.9" -ei "2,3" "8,9" -p "A red cube on top of a blue sphere" -l1
49
+
50
+ # relatively smaller number of parameters
51
+ python ./bin/SdEditorCmd.py -roi "0.4,0.7,0.1,0.5" "0.4,0.7,0.5,0.9" -ei "2,3" "8,9" -p "A red cube on top of a blue sphere" -l2
52
+
53
+ This also contains -m function
54
+
55
+ See more examples under scripts/sdeditor-example.sh
56
+ """
57
+ )
DirectedDiffusion/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import Diffusion
2
+ from . import AttnCore
3
+ from . import AttnEditorUtils
4
+ from . import Plotter
5
+ from . import ProgramInfo
6
+ import importlib
7
+ importlib.reload(Diffusion)
8
+ importlib.reload(AttnCore)
9
+ importlib.reload(AttnEditorUtils)
10
+ importlib.reload(Plotter)
11
+ importlib.reload(ProgramInfo)
app.py CHANGED
@@ -1,7 +1,9 @@
 
1
  import gradio as gr
2
 
3
  def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
  iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
  iface.launch()
1
+ import DirectedDiffusion
2
  import gradio as gr
3
 
4
  def greet(name):
5
+ text = str(DirectedDiffusion)
6
+ return "Hello " + name + "!!" + text
7
 
8
  iface = gr.Interface(fn=greet, inputs="text", outputs="text")
9
  iface.launch()