Spaces:

X-HighVoltage-X
/

Flux.1-Fill-dev-Inpainting-Super-Realism-LoRA

Running on Zero

App Files Files Community

X-HighVoltage-X commited on Oct 8

Commit

c22d93c

verified ·

1 Parent(s): 1faa9e5

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -12

app.py CHANGED Viewed

@@ -98,41 +98,49 @@ def inpaint(
     if preserve_unmasked:
         np_img = np.array(image).astype(np.float32) / 255.0
         img_t = torch.from_numpy(np_img).permute(2, 0, 1).unsqueeze(0).to(pipe.device)
         img_t = (img_t * 2 - 1).to(dtype=pipe.vae.dtype)
         np_mask = np.array(mask).astype(np.float32) / 255.0
         mask_t = torch.from_numpy(np_mask).unsqueeze(0).unsqueeze(0).to(pipe.device)
         with torch.no_grad():
             latents_orig = pipe.vae.encode(img_t).latent_dist.sample()
-            scaling = getattr(pipe.vae.config, "scaling_factor", getattr(pipe, "vae_scale_factor", None))
-            if scaling is not None:
-                latents_orig = latents_orig * scaling
         # Ajustar máscara al tamaño de los latentes
-        mask_t = torch.nn.functional.interpolate(
-            mask_t, size=latents_orig.shape[-2:], mode="nearest"
-        )
         def callback_on_step_end(pipe_self, i, t, callback_kwargs):
             latents = callback_kwargs.get("latents", None)
             if latents is not None:
                 # Ajustar dinámicamente los tamaños al del tensor actual
-                if mask_t.shape[-2:] != latents.shape[-2:]:
-                    resized_mask = torch.nn.functional.interpolate(mask_t, size=latents.shape[-2:], mode="nearest")
                 else:
                     resized_mask = mask_t
-                if latents_orig.shape[-2:] != latents.shape[-2:]:
-                    resized_latents_orig = torch.nn.functional.interpolate(latents_orig, size=latents.shape[-2:], mode="nearest")
                 else:
                     resized_latents_orig = latents_orig
                 latents = latents * resized_mask + resized_latents_orig * (1 - resized_mask)
                 callback_kwargs["latents"] = latents
-            # 🔍 Solo para depuración
-            print(f"[Callback] step={i}, keys={list(callback_kwargs.keys())}, latents={latents.shape if latents is not None else None}")
             return callback_kwargs
         callback_on_step_end_tensor_inputs = ["latents"]

     if preserve_unmasked:
         np_img = np.array(image).astype(np.float32) / 255.0
         img_t = torch.from_numpy(np_img).permute(2, 0, 1).unsqueeze(0).to(pipe.device)
+        img_t = F.interpolate(img_t, size=(height, width), mode='bilinear', align_corners=False)
         img_t = (img_t * 2 - 1).to(dtype=pipe.vae.dtype)
         np_mask = np.array(mask).astype(np.float32) / 255.0
         mask_t = torch.from_numpy(np_mask).unsqueeze(0).unsqueeze(0).to(pipe.device)
+        mask_t = F.interpolate(mask_t, size=(height, width), mode='nearest')
         with torch.no_grad():
             latents_orig = pipe.vae.encode(img_t).latent_dist.sample()
+            scaling = getattr(pipe.vae.config, "scaling_factor", getattr(pipe, "vae_scale_factor", 0.13025))
+            latents_orig = latents_orig * scaling
         # Ajustar máscara al tamaño de los latentes
+        latent_height = latents_orig.shape[2]
+        latent_width = latents_orig.shape[3]
+        mask_t = F.interpolate(mask_t, size=(latent_height, latent_width), mode="nearest")
         def callback_on_step_end(pipe_self, i, t, callback_kwargs):
             latents = callback_kwargs.get("latents", None)
             if latents is not None:
+                # Verificar que tengamos 4 dimensiones [batch, channels, height, width]
+                if latents.dim() != 4:
+                    print(f"⚠️ Warning: latents has {latents.dim()} dimensions, expected 4")
+                    return callback_kwargs
                 # Ajustar dinámicamente los tamaños al del tensor actual
+                current_height = latents.shape[2]
+                current_width = latents.shape[3]
+                if mask_t.shape[-2:] != (current_height, current_width):
+                    resized_mask = F.interpolate(mask_t, size=(current_height, current_width), mode="nearest")
                 else:
                     resized_mask = mask_t
+                if latents_orig.shape[-2:] != (current_height, current_width):
+                    resized_latents_orig = F.interpolate(latents_orig, size=(current_height, current_width), mode="nearest")
                 else:
                     resized_latents_orig = latents_orig
+                # Mezclar solo en las áreas no enmascaradas
                 latents = latents * resized_mask + resized_latents_orig * (1 - resized_mask)
                 callback_kwargs["latents"] = latents
             return callback_kwargs
         callback_on_step_end_tensor_inputs = ["latents"]