Spaces:
Running
on
Zero
Running
on
Zero
Attempt to integrate negative prompts
Browse files
app.py
CHANGED
@@ -157,7 +157,6 @@ def retrieve_timesteps(
|
|
157 |
timesteps = scheduler.timesteps
|
158 |
return timesteps, num_inference_steps
|
159 |
|
160 |
-
# FLUX pipeline
|
161 |
@torch.inference_mode()
|
162 |
def flux_pipe_call_that_returns_an_iterable_of_images(
|
163 |
self,
|
@@ -180,9 +179,11 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
|
|
180 |
max_sequence_length: int = 512,
|
181 |
good_vae: Optional[Any] = None,
|
182 |
):
|
|
|
183 |
height = height or self.default_sample_size * self.vae_scale_factor
|
184 |
width = width or self.default_sample_size * self.vae_scale_factor
|
185 |
|
|
|
186 |
self.check_inputs(
|
187 |
prompt,
|
188 |
prompt_2,
|
@@ -201,7 +202,9 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
|
|
201 |
device = self._execution_device
|
202 |
|
203 |
lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
|
204 |
-
|
|
|
|
|
205 |
prompt=prompt,
|
206 |
prompt_2=prompt_2,
|
207 |
prompt_embeds=prompt_embeds,
|
@@ -212,18 +215,38 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
|
|
212 |
lora_scale=lora_scale,
|
213 |
)
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
num_channels_latents = self.transformer.config.in_channels // 4
|
216 |
latents, latent_image_ids = self.prepare_latents(
|
217 |
batch_size * num_images_per_prompt,
|
218 |
num_channels_latents,
|
219 |
height,
|
220 |
width,
|
221 |
-
|
222 |
device,
|
223 |
generator,
|
224 |
latents,
|
225 |
)
|
226 |
|
|
|
227 |
sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
|
228 |
image_seq_len = latents.shape[1]
|
229 |
mu = calculate_shift(
|
@@ -243,41 +266,66 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
|
|
243 |
)
|
244 |
self._num_timesteps = len(timesteps)
|
245 |
|
246 |
-
guidance =
|
|
|
|
|
|
|
|
|
247 |
|
|
|
248 |
for i, t in enumerate(timesteps):
|
249 |
-
if self.
|
250 |
continue
|
251 |
|
252 |
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
253 |
print(f"Step {i + 1}/{num_inference_steps} - Timestep: {timestep.item()}\n")
|
254 |
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
hidden_states=latents,
|
257 |
timestep=timestep / 1000,
|
258 |
guidance=guidance,
|
259 |
-
pooled_projections=
|
260 |
-
encoder_hidden_states=
|
261 |
-
txt_ids=
|
262 |
img_ids=latent_image_ids,
|
263 |
joint_attention_kwargs=self.joint_attention_kwargs,
|
264 |
return_dict=False,
|
265 |
)[0]
|
266 |
|
|
|
|
|
|
|
|
|
267 |
latents_for_image = self._unpack_latents(latents, height, width, self.vae_scale_factor)
|
268 |
latents_for_image = (latents_for_image / self.vae.config.scaling_factor) + self.vae.config.shift_factor
|
269 |
image = self.vae.decode(latents_for_image, return_dict=False)[0]
|
270 |
yield self.image_processor.postprocess(image, output_type=output_type)[0]
|
|
|
|
|
271 |
latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
|
272 |
torch.cuda.empty_cache()
|
273 |
|
|
|
274 |
latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
|
275 |
latents = (latents / good_vae.config.scaling_factor) + good_vae.config.shift_factor
|
276 |
image = good_vae.decode(latents, return_dict=False)[0]
|
277 |
self.maybe_free_model_hooks()
|
278 |
torch.cuda.empty_cache()
|
279 |
yield self.image_processor.postprocess(image, output_type=output_type)[0]
|
280 |
-
|
281 |
#--------------------------------------------------Model Initialization-----------------------------------------------------------------------------------------#
|
282 |
|
283 |
dtype = torch.bfloat16
|
@@ -343,7 +391,7 @@ def update_selection(evt: gr.SelectData, width, height, aspect_ratio):
|
|
343 |
)
|
344 |
|
345 |
@spaces.GPU(duration=120,progress=gr.Progress(track_tqdm=True))
|
346 |
-
def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, progress):
|
347 |
pipe.to("cuda")
|
348 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
349 |
flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
|
@@ -384,7 +432,7 @@ def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scal
|
|
384 |
):
|
385 |
yield img
|
386 |
|
387 |
-
def generate_image_to_image(prompt_mash, image_input_path, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress):
|
388 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
389 |
pipe_i2i.to("cuda")
|
390 |
flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
|
@@ -447,7 +495,7 @@ def run_lora(prompt, map_option, image_input, image_strength, cfg_scale, steps,
|
|
447 |
print(f"Conditioned Image: {image_input.size}.. converted to RGB and resized\n")
|
448 |
if map_option != "Prompt":
|
449 |
prompt = PROMPTS[map_option]
|
450 |
-
|
451 |
|
452 |
selected_lora = loras[selected_index]
|
453 |
lora_path = selected_lora["repo"]
|
@@ -484,7 +532,7 @@ def run_lora(prompt, map_option, image_input, image_strength, cfg_scale, steps,
|
|
484 |
|
485 |
if(image_input is not None):
|
486 |
print(f"\nGenerating image to image with seed: {seed}\n")
|
487 |
-
generated_image = generate_image_to_image(prompt_mash, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress)
|
488 |
|
489 |
if enlarge:
|
490 |
upscaled_image = upscale_image(generated_image, max(1.0,min((TARGET_SIZE[0]/width),(TARGET_SIZE[1]/height))))
|
@@ -498,7 +546,7 @@ def run_lora(prompt, map_option, image_input, image_strength, cfg_scale, steps,
|
|
498 |
final_image = tmp_upscaled.name
|
499 |
yield final_image, seed, gr.update(visible=False)
|
500 |
else:
|
501 |
-
image_generator = generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, progress)
|
502 |
|
503 |
final_image = None
|
504 |
step_counter = 0
|
@@ -816,7 +864,7 @@ with gr.Blocks(css_paths="style_20250314.css", title=title, theme='Surn/beeuty',
|
|
816 |
label="Prompt",
|
817 |
visible=False,
|
818 |
elem_classes="solid",
|
819 |
-
value="
|
820 |
lines=4
|
821 |
)
|
822 |
negative_prompt_textbox = gr.Textbox(
|
|
|
157 |
timesteps = scheduler.timesteps
|
158 |
return timesteps, num_inference_steps
|
159 |
|
|
|
160 |
@torch.inference_mode()
|
161 |
def flux_pipe_call_that_returns_an_iterable_of_images(
|
162 |
self,
|
|
|
179 |
max_sequence_length: int = 512,
|
180 |
good_vae: Optional[Any] = None,
|
181 |
):
|
182 |
+
# Set default height and width
|
183 |
height = height or self.default_sample_size * self.vae_scale_factor
|
184 |
width = width or self.default_sample_size * self.vae_scale_factor
|
185 |
|
186 |
+
# Validate inputs
|
187 |
self.check_inputs(
|
188 |
prompt,
|
189 |
prompt_2,
|
|
|
202 |
device = self._execution_device
|
203 |
|
204 |
lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
|
205 |
+
|
206 |
+
# Encode the positive prompt
|
207 |
+
prompt_embeds_pos, pooled_prompt_embeds_pos, text_ids_pos = self.encode_prompt(
|
208 |
prompt=prompt,
|
209 |
prompt_2=prompt_2,
|
210 |
prompt_embeds=prompt_embeds,
|
|
|
215 |
lora_scale=lora_scale,
|
216 |
)
|
217 |
|
218 |
+
# Encode the negative prompt if provided
|
219 |
+
if negative_prompt is not None:
|
220 |
+
prompt_embeds_neg, pooled_prompt_embeds_neg, text_ids_neg = self.encode_prompt(
|
221 |
+
prompt=negative_prompt,
|
222 |
+
prompt_2=None, # Assuming no secondary prompt for negative
|
223 |
+
prompt_embeds=None,
|
224 |
+
pooled_prompt_embeds=None,
|
225 |
+
device=device,
|
226 |
+
num_images_per_prompt=num_images_per_prompt,
|
227 |
+
max_sequence_length=max_sequence_length,
|
228 |
+
lora_scale=lora_scale,
|
229 |
+
)
|
230 |
+
else:
|
231 |
+
# Fallback to positive embeddings if no negative prompt is provided
|
232 |
+
prompt_embeds_neg = prompt_embeds_pos
|
233 |
+
pooled_prompt_embeds_neg = pooled_prompt_embeds_pos
|
234 |
+
text_ids_neg = text_ids_pos
|
235 |
+
|
236 |
+
# Prepare latents
|
237 |
num_channels_latents = self.transformer.config.in_channels // 4
|
238 |
latents, latent_image_ids = self.prepare_latents(
|
239 |
batch_size * num_images_per_prompt,
|
240 |
num_channels_latents,
|
241 |
height,
|
242 |
width,
|
243 |
+
prompt_embeds_pos.dtype,
|
244 |
device,
|
245 |
generator,
|
246 |
latents,
|
247 |
)
|
248 |
|
249 |
+
# Set up timesteps
|
250 |
sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
|
251 |
image_seq_len = latents.shape[1]
|
252 |
mu = calculate_shift(
|
|
|
266 |
)
|
267 |
self._num_timesteps = len(timesteps)
|
268 |
|
269 |
+
guidance = (
|
270 |
+
torch.full([1], guidance_scale, device=device, dtype=torch.float32).expand(latents.shape[0])
|
271 |
+
if self.transformer.config.guidance_embeds
|
272 |
+
else None
|
273 |
+
)
|
274 |
|
275 |
+
# Denoising loop
|
276 |
for i, t in enumerate(timesteps):
|
277 |
+
if self._interrupt:
|
278 |
continue
|
279 |
|
280 |
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
281 |
print(f"Step {i + 1}/{num_inference_steps} - Timestep: {timestep.item()}\n")
|
282 |
|
283 |
+
# Compute noise prediction for positive prompt
|
284 |
+
noise_pred_pos = self.transformer(
|
285 |
+
hidden_states=latents,
|
286 |
+
timestep=timestep / 1000,
|
287 |
+
guidance=guidance,
|
288 |
+
pooled_projections=pooled_prompt_embeds_pos,
|
289 |
+
encoder_hidden_states=prompt_embeds_pos,
|
290 |
+
txt_ids=text_ids_pos,
|
291 |
+
img_ids=latent_image_ids,
|
292 |
+
joint_attention_kwargs=self.joint_attention_kwargs,
|
293 |
+
return_dict=False,
|
294 |
+
)[0]
|
295 |
+
|
296 |
+
# Compute noise prediction for negative prompt
|
297 |
+
noise_pred_neg = self.transformer(
|
298 |
hidden_states=latents,
|
299 |
timestep=timestep / 1000,
|
300 |
guidance=guidance,
|
301 |
+
pooled_projections=pooled_prompt_embeds_neg,
|
302 |
+
encoder_hidden_states=prompt_embeds_neg,
|
303 |
+
txt_ids=text_ids_neg,
|
304 |
img_ids=latent_image_ids,
|
305 |
joint_attention_kwargs=self.joint_attention_kwargs,
|
306 |
return_dict=False,
|
307 |
)[0]
|
308 |
|
309 |
+
# Combine noise predictions using guidance scale
|
310 |
+
noise_pred = noise_pred_neg + guidance_scale * (noise_pred_pos - noise_pred_neg)
|
311 |
+
|
312 |
+
# Generate intermediate image
|
313 |
latents_for_image = self._unpack_latents(latents, height, width, self.vae_scale_factor)
|
314 |
latents_for_image = (latents_for_image / self.vae.config.scaling_factor) + self.vae.config.shift_factor
|
315 |
image = self.vae.decode(latents_for_image, return_dict=False)[0]
|
316 |
yield self.image_processor.postprocess(image, output_type=output_type)[0]
|
317 |
+
|
318 |
+
# Update latents with combined noise prediction
|
319 |
latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
|
320 |
torch.cuda.empty_cache()
|
321 |
|
322 |
+
# Final image generation
|
323 |
latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
|
324 |
latents = (latents / good_vae.config.scaling_factor) + good_vae.config.shift_factor
|
325 |
image = good_vae.decode(latents, return_dict=False)[0]
|
326 |
self.maybe_free_model_hooks()
|
327 |
torch.cuda.empty_cache()
|
328 |
yield self.image_processor.postprocess(image, output_type=output_type)[0]
|
|
|
329 |
#--------------------------------------------------Model Initialization-----------------------------------------------------------------------------------------#
|
330 |
|
331 |
dtype = torch.bfloat16
|
|
|
391 |
)
|
392 |
|
393 |
@spaces.GPU(duration=120,progress=gr.Progress(track_tqdm=True))
|
394 |
+
def generate_image(prompt_mash, negative_prompt, steps, seed, cfg_scale, width, height, lora_scale, progress):
|
395 |
pipe.to("cuda")
|
396 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
397 |
flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
|
|
|
432 |
):
|
433 |
yield img
|
434 |
|
435 |
+
def generate_image_to_image(prompt_mash, negative_prompt, image_input_path, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress):
|
436 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
437 |
pipe_i2i.to("cuda")
|
438 |
flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
|
|
|
495 |
print(f"Conditioned Image: {image_input.size}.. converted to RGB and resized\n")
|
496 |
if map_option != "Prompt":
|
497 |
prompt = PROMPTS[map_option]
|
498 |
+
negative_prompt = NEGATIVE_PROMPTS.get(map_option, "")
|
499 |
|
500 |
selected_lora = loras[selected_index]
|
501 |
lora_path = selected_lora["repo"]
|
|
|
532 |
|
533 |
if(image_input is not None):
|
534 |
print(f"\nGenerating image to image with seed: {seed}\n")
|
535 |
+
generated_image = generate_image_to_image(prompt_mash, negative_prompt, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress)
|
536 |
|
537 |
if enlarge:
|
538 |
upscaled_image = upscale_image(generated_image, max(1.0,min((TARGET_SIZE[0]/width),(TARGET_SIZE[1]/height))))
|
|
|
546 |
final_image = tmp_upscaled.name
|
547 |
yield final_image, seed, gr.update(visible=False)
|
548 |
else:
|
549 |
+
image_generator = generate_image(prompt_mash, negative_prompt, steps, seed, cfg_scale, width, height, lora_scale, progress)
|
550 |
|
551 |
final_image = None
|
552 |
step_counter = 0
|
|
|
864 |
label="Prompt",
|
865 |
visible=False,
|
866 |
elem_classes="solid",
|
867 |
+
value="Planetary overhead view, directly from above, centered on the planet’s surface, (rectangular tabletop_map) alien planet map, Battletech_boardgame scifi world with forests, lakes, oceans, continents and snow at the top and bottom, (middle is dark, no_reflections, no_shadows), looking straight down.",
|
868 |
lines=4
|
869 |
)
|
870 |
negative_prompt_textbox = gr.Textbox(
|