HexGameMaker

Running on Zero

App Files Files Community

Surn commited on Mar 22

Commit

a85cd89

1 Parent(s): bb3e4e2

Attempt to integrate negative prompts

Browse files

Files changed (1) hide show

app.py +64 -16

app.py CHANGED Viewed

@@ -157,7 +157,6 @@ def retrieve_timesteps(
         timesteps = scheduler.timesteps
     return timesteps, num_inference_steps
-# FLUX pipeline
 @torch.inference_mode()
 def flux_pipe_call_that_returns_an_iterable_of_images(
     self,
@@ -180,9 +179,11 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
     max_sequence_length: int = 512,
     good_vae: Optional[Any] = None,
 ):
     height = height or self.default_sample_size * self.vae_scale_factor
     width = width or self.default_sample_size * self.vae_scale_factor
     self.check_inputs(
         prompt,
         prompt_2,
@@ -201,7 +202,9 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
     device = self._execution_device
     lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
-    prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt(
         prompt=prompt,
         prompt_2=prompt_2,
         prompt_embeds=prompt_embeds,
@@ -212,18 +215,38 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
         lora_scale=lora_scale,
     )
     num_channels_latents = self.transformer.config.in_channels // 4
     latents, latent_image_ids = self.prepare_latents(
         batch_size * num_images_per_prompt,
         num_channels_latents,
         height,
         width,
-        prompt_embeds.dtype,
         device,
         generator,
         latents,
     )
     sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
     image_seq_len = latents.shape[1]
     mu = calculate_shift(
@@ -243,41 +266,66 @@ def flux_pipe_call_that_returns_an_iterable_of_images(
     )
     self._num_timesteps = len(timesteps)
-    guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float32).expand(latents.shape[0]) if self.transformer.config.guidance_embeds else None
     for i, t in enumerate(timesteps):
-        if self.interrupt:
             continue
         timestep = t.expand(latents.shape[0]).to(latents.dtype)
         print(f"Step {i + 1}/{num_inference_steps} - Timestep: {timestep.item()}\n")
-        noise_pred = self.transformer(
             hidden_states=latents,
             timestep=timestep / 1000,
             guidance=guidance,
-            pooled_projections=pooled_prompt_embeds,
-            encoder_hidden_states=prompt_embeds,
-            txt_ids=text_ids,
             img_ids=latent_image_ids,
             joint_attention_kwargs=self.joint_attention_kwargs,
             return_dict=False,
         )[0]
         latents_for_image = self._unpack_latents(latents, height, width, self.vae_scale_factor)
         latents_for_image = (latents_for_image / self.vae.config.scaling_factor) + self.vae.config.shift_factor
         image = self.vae.decode(latents_for_image, return_dict=False)[0]
         yield self.image_processor.postprocess(image, output_type=output_type)[0]
         latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
         torch.cuda.empty_cache()
     latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
     latents = (latents / good_vae.config.scaling_factor) + good_vae.config.shift_factor
     image = good_vae.decode(latents, return_dict=False)[0]
     self.maybe_free_model_hooks()
     torch.cuda.empty_cache()
     yield self.image_processor.postprocess(image, output_type=output_type)[0]
 #--------------------------------------------------Model Initialization-----------------------------------------------------------------------------------------#
 dtype = torch.bfloat16
@@ -343,7 +391,7 @@ def update_selection(evt: gr.SelectData, width, height, aspect_ratio):
     )
 @spaces.GPU(duration=120,progress=gr.Progress(track_tqdm=True))
-def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, progress):
     pipe.to("cuda")
     generator = torch.Generator(device="cuda").manual_seed(seed)
     flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
@@ -384,7 +432,7 @@ def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scal
         ):
             yield img
-def generate_image_to_image(prompt_mash, image_input_path, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress):
     generator = torch.Generator(device="cuda").manual_seed(seed)
     pipe_i2i.to("cuda")
     flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
@@ -447,7 +495,7 @@ def run_lora(prompt, map_option, image_input, image_strength, cfg_scale, steps,
             print(f"Conditioned Image: {image_input.size}.. converted to RGB and resized\n")
     if map_option != "Prompt":
         prompt = PROMPTS[map_option]
-        # negative_prompt = NEGATIVE_PROMPTS.get(map_option, "")
     selected_lora = loras[selected_index]
     lora_path = selected_lora["repo"]
@@ -484,7 +532,7 @@ def run_lora(prompt, map_option, image_input, image_strength, cfg_scale, steps,
     if(image_input is not None):
         print(f"\nGenerating image to image with seed: {seed}\n")
-        generated_image = generate_image_to_image(prompt_mash, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress)
         if enlarge:
             upscaled_image = upscale_image(generated_image, max(1.0,min((TARGET_SIZE[0]/width),(TARGET_SIZE[1]/height))))
@@ -498,7 +546,7 @@ def run_lora(prompt, map_option, image_input, image_strength, cfg_scale, steps,
             final_image = tmp_upscaled.name
         yield final_image, seed, gr.update(visible=False)
     else:
-        image_generator = generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, progress)
         final_image = None
         step_counter = 0
@@ -816,7 +864,7 @@ with gr.Blocks(css_paths="style_20250314.css", title=title, theme='Surn/beeuty',
                     label="Prompt",
                     visible=False,
                     elem_classes="solid",
-                    value="top-down, (rectangular tabletop_map) alien planet map, Battletech_boardgame scifi world with forests, lakes, oceans, continents and snow at the top and bottom, (middle is dark, no_reflections, no_shadows), from directly above. From 100,000 feet looking straight down. 10000 foot-view",
                     lines=4
                 )
                 negative_prompt_textbox = gr.Textbox(

         timesteps = scheduler.timesteps
     return timesteps, num_inference_steps
 @torch.inference_mode()
 def flux_pipe_call_that_returns_an_iterable_of_images(
     self,
     max_sequence_length: int = 512,
     good_vae: Optional[Any] = None,
 ):
+    # Set default height and width
     height = height or self.default_sample_size * self.vae_scale_factor
     width = width or self.default_sample_size * self.vae_scale_factor
+    # Validate inputs
     self.check_inputs(
         prompt,
         prompt_2,
     device = self._execution_device
     lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
+    # Encode the positive prompt
+    prompt_embeds_pos, pooled_prompt_embeds_pos, text_ids_pos = self.encode_prompt(
         prompt=prompt,
         prompt_2=prompt_2,
         prompt_embeds=prompt_embeds,
         lora_scale=lora_scale,
     )
+    # Encode the negative prompt if provided
+    if negative_prompt is not None:
+        prompt_embeds_neg, pooled_prompt_embeds_neg, text_ids_neg = self.encode_prompt(
+            prompt=negative_prompt,
+            prompt_2=None,  # Assuming no secondary prompt for negative
+            prompt_embeds=None,
+            pooled_prompt_embeds=None,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            max_sequence_length=max_sequence_length,
+            lora_scale=lora_scale,
+        )
+    else:
+        # Fallback to positive embeddings if no negative prompt is provided
+        prompt_embeds_neg = prompt_embeds_pos
+        pooled_prompt_embeds_neg = pooled_prompt_embeds_pos
+        text_ids_neg = text_ids_pos
+    # Prepare latents
     num_channels_latents = self.transformer.config.in_channels // 4
     latents, latent_image_ids = self.prepare_latents(
         batch_size * num_images_per_prompt,
         num_channels_latents,
         height,
         width,
+        prompt_embeds_pos.dtype,
         device,
         generator,
         latents,
     )
+    # Set up timesteps
     sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
     image_seq_len = latents.shape[1]
     mu = calculate_shift(
     )
     self._num_timesteps = len(timesteps)
+    guidance = (
+        torch.full([1], guidance_scale, device=device, dtype=torch.float32).expand(latents.shape[0])
+        if self.transformer.config.guidance_embeds
+        else None
+    )
+    # Denoising loop
     for i, t in enumerate(timesteps):
+        if self._interrupt:
             continue
         timestep = t.expand(latents.shape[0]).to(latents.dtype)
         print(f"Step {i + 1}/{num_inference_steps} - Timestep: {timestep.item()}\n")
+        # Compute noise prediction for positive prompt
+        noise_pred_pos = self.transformer(
+            hidden_states=latents,
+            timestep=timestep / 1000,
+            guidance=guidance,
+            pooled_projections=pooled_prompt_embeds_pos,
+            encoder_hidden_states=prompt_embeds_pos,
+            txt_ids=text_ids_pos,
+            img_ids=latent_image_ids,
+            joint_attention_kwargs=self.joint_attention_kwargs,
+            return_dict=False,
+        )[0]
+        # Compute noise prediction for negative prompt
+        noise_pred_neg = self.transformer(
             hidden_states=latents,
             timestep=timestep / 1000,
             guidance=guidance,
+            pooled_projections=pooled_prompt_embeds_neg,
+            encoder_hidden_states=prompt_embeds_neg,
+            txt_ids=text_ids_neg,
             img_ids=latent_image_ids,
             joint_attention_kwargs=self.joint_attention_kwargs,
             return_dict=False,
         )[0]
+        # Combine noise predictions using guidance scale
+        noise_pred = noise_pred_neg + guidance_scale * (noise_pred_pos - noise_pred_neg)
+        # Generate intermediate image
         latents_for_image = self._unpack_latents(latents, height, width, self.vae_scale_factor)
         latents_for_image = (latents_for_image / self.vae.config.scaling_factor) + self.vae.config.shift_factor
         image = self.vae.decode(latents_for_image, return_dict=False)[0]
         yield self.image_processor.postprocess(image, output_type=output_type)[0]
+        # Update latents with combined noise prediction
         latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
         torch.cuda.empty_cache()
+    # Final image generation
     latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
     latents = (latents / good_vae.config.scaling_factor) + good_vae.config.shift_factor
     image = good_vae.decode(latents, return_dict=False)[0]
     self.maybe_free_model_hooks()
     torch.cuda.empty_cache()
     yield self.image_processor.postprocess(image, output_type=output_type)[0]
 #--------------------------------------------------Model Initialization-----------------------------------------------------------------------------------------#
 dtype = torch.bfloat16
     )
 @spaces.GPU(duration=120,progress=gr.Progress(track_tqdm=True))
+def generate_image(prompt_mash, negative_prompt, steps, seed, cfg_scale, width, height, lora_scale, progress):
     pipe.to("cuda")
     generator = torch.Generator(device="cuda").manual_seed(seed)
     flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
         ):
             yield img
+def generate_image_to_image(prompt_mash, negative_prompt, image_input_path, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress):
     generator = torch.Generator(device="cuda").manual_seed(seed)
     pipe_i2i.to("cuda")
     flash_attention_enabled = torch.backends.cuda.flash_sdp_enabled()
             print(f"Conditioned Image: {image_input.size}.. converted to RGB and resized\n")
     if map_option != "Prompt":
         prompt = PROMPTS[map_option]
+        negative_prompt = NEGATIVE_PROMPTS.get(map_option, "")
     selected_lora = loras[selected_index]
     lora_path = selected_lora["repo"]
     if(image_input is not None):
         print(f"\nGenerating image to image with seed: {seed}\n")
+        generated_image = generate_image_to_image(prompt_mash, negative_prompt, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed, progress)
         if enlarge:
             upscaled_image = upscale_image(generated_image, max(1.0,min((TARGET_SIZE[0]/width),(TARGET_SIZE[1]/height))))
             final_image = tmp_upscaled.name
         yield final_image, seed, gr.update(visible=False)
     else:
+        image_generator = generate_image(prompt_mash, negative_prompt, steps, seed, cfg_scale, width, height, lora_scale, progress)
         final_image = None
         step_counter = 0
                     label="Prompt",
                     visible=False,
                     elem_classes="solid",
+                    value="Planetary overhead view, directly from above, centered on the planet’s surface, (rectangular tabletop_map) alien planet map, Battletech_boardgame scifi world with forests, lakes, oceans, continents and snow at the top and bottom, (middle is dark, no_reflections, no_shadows), looking straight down.",
                     lines=4
                 )
                 negative_prompt_textbox = gr.Textbox(