Revert "Upload folder using huggingface_hub"

Browse files

This reverts commit ec377361f0dcaf8b153e6940aa88d84bd7ab7515.

Files changed (9) hide show

fcdm_diffae/__init__.py +1 -1
fcdm_diffae/__pycache__/__init__.cpython-312.pyc +0 -0
fcdm_diffae/__pycache__/config.cpython-312.pyc +0 -0
fcdm_diffae/__pycache__/decoder.cpython-312.pyc +0 -0
fcdm_diffae/__pycache__/encoder.cpython-312.pyc +0 -0
fcdm_diffae/__pycache__/model.cpython-312.pyc +0 -0
fcdm_diffae/__pycache__/samplers.cpython-312.pyc +0 -0
fcdm_diffae/config.py +0 -18
fcdm_diffae/model.py +11 -32

fcdm_diffae/__init__.py CHANGED Viewed

@@ -26,8 +26,8 @@ from .encoder import EncoderPosterior
 from .model import FCDMDiffAE
 __all__ = [
-    "EncoderPosterior",
     "FCDMDiffAE",
     "FCDMDiffAEConfig",
     "FCDMDiffAEInferenceConfig",
 ]

 from .model import FCDMDiffAE
 __all__ = [
     "FCDMDiffAE",
     "FCDMDiffAEConfig",
     "FCDMDiffAEInferenceConfig",
+    "EncoderPosterior",
 ]

fcdm_diffae/__pycache__/__init__.cpython-312.pyc CHANGED Viewed

Binary files a/fcdm_diffae/__pycache__/__init__.cpython-312.pyc and b/fcdm_diffae/__pycache__/__init__.cpython-312.pyc differ

fcdm_diffae/__pycache__/config.cpython-312.pyc CHANGED Viewed

Binary files a/fcdm_diffae/__pycache__/config.cpython-312.pyc and b/fcdm_diffae/__pycache__/config.cpython-312.pyc differ

fcdm_diffae/__pycache__/decoder.cpython-312.pyc CHANGED Viewed

Binary files a/fcdm_diffae/__pycache__/decoder.cpython-312.pyc and b/fcdm_diffae/__pycache__/decoder.cpython-312.pyc differ

fcdm_diffae/__pycache__/encoder.cpython-312.pyc CHANGED Viewed

Binary files a/fcdm_diffae/__pycache__/encoder.cpython-312.pyc and b/fcdm_diffae/__pycache__/encoder.cpython-312.pyc differ

fcdm_diffae/__pycache__/model.cpython-312.pyc CHANGED Viewed

Binary files a/fcdm_diffae/__pycache__/model.cpython-312.pyc and b/fcdm_diffae/__pycache__/model.cpython-312.pyc differ

fcdm_diffae/__pycache__/samplers.cpython-312.pyc CHANGED Viewed

Binary files a/fcdm_diffae/__pycache__/samplers.cpython-312.pyc and b/fcdm_diffae/__pycache__/samplers.cpython-312.pyc differ

fcdm_diffae/config.py CHANGED Viewed

@@ -26,30 +26,12 @@ class FCDMDiffAEConfig:
     bottleneck_posterior_kind: str = "diagonal_gaussian"
     # Post-bottleneck normalization: "channel_wise" or "disabled"
     bottleneck_norm_mode: str = "disabled"
-    # Bottleneck patchification: "off" or "patch_2x2"
-    # When "patch_2x2", encoder latents are 2x2 patchified after the bottleneck
-    # (channels * 4, spatial / 2), and decode unpatchifies before the decoder.
-    bottleneck_patchify_mode: str = "off"
     # VP diffusion schedule endpoints
     logsnr_min: float = -10.0
     logsnr_max: float = 10.0
     # Pixel-space noise std for VP diffusion initialization
     pixel_noise_std: float = 0.558
-    @property
-    def latent_channels(self) -> int:
-        """Channel width of the exported latent space."""
-        if self.bottleneck_patchify_mode == "patch_2x2":
-            return self.bottleneck_dim * 4
-        return self.bottleneck_dim
-    @property
-    def effective_patch_size(self) -> int:
-        """Effective spatial stride from image to latent grid."""
-        if self.bottleneck_patchify_mode == "patch_2x2":
-            return self.patch_size * 2
-        return self.patch_size
     def save(self, path: str | Path) -> None:
         """Save config as JSON."""
         p = Path(path)

     bottleneck_posterior_kind: str = "diagonal_gaussian"
     # Post-bottleneck normalization: "channel_wise" or "disabled"
     bottleneck_norm_mode: str = "disabled"
     # VP diffusion schedule endpoints
     logsnr_min: float = -10.0
     logsnr_max: float = 10.0
     # Pixel-space noise std for VP diffusion initialization
     pixel_noise_std: float = 0.558
     def save(self, path: str | Path) -> None:
         """Save config as JSON."""
         p = Path(path)

fcdm_diffae/model.py CHANGED Viewed

@@ -71,14 +71,14 @@ class FCDMDiffAE(nn.Module):
         super().__init__()
         self.config = config
-        # Latent running stats for whitening/dewhitening (at exported latent channels)
         self.register_buffer(
             "latent_norm_running_mean",
-            torch.zeros((config.latent_channels,), dtype=torch.float32),
         )
         self.register_buffer(
             "latent_norm_running_var",
-            torch.ones((config.latent_channels,), dtype=torch.float32),
         )
         self.encoder = Encoder(
@@ -205,20 +205,6 @@ class FCDMDiffAE(nn.Module):
         mean, std = self._latent_norm_stats()
         return z * std.to(device=z.device) + mean.to(device=z.device)
-    def _patchify(self, z: Tensor) -> Tensor:
-        """2x2 patchify: [B, C, H, W] -> [B, 4C, H/2, W/2]."""
-        b, c, h, w = z.shape
-        z = z.reshape(b, c, h // 2, 2, w // 2, 2)
-        z = z.permute(0, 1, 3, 5, 2, 4)
-        return z.reshape(b, c * 4, h // 2, w // 2)
-    def _unpatchify(self, z: Tensor) -> Tensor:
-        """2x2 unpatchify: [B, 4C, H/2, W/2] -> [B, C, H, W]."""
-        b, c, h, w = z.shape
-        z = z.reshape(b, c // 4, 2, 2, h, w)
-        z = z.permute(0, 1, 4, 2, 5, 3)
-        return z.reshape(b, c // 4, h * 2, w * 2)
     def encode(self, images: Tensor) -> Tensor:
         """Encode images to whitened latents (posterior mode).
@@ -226,19 +212,16 @@ class FCDMDiffAE(nn.Module):
         use by downstream latent-space diffusion models.
         Args:
-            images: [B, 3, H, W] in [-1, 1], H and W divisible by
-                effective_patch_size.
         Returns:
-            Whitened latents [B, latent_channels, H/effective_patch, W/effective_patch].
         """
         try:
             model_dtype = next(self.parameters()).dtype
         except StopIteration:
             model_dtype = torch.float32
         z = self.encoder(images.to(dtype=model_dtype))
-        if self.config.bottleneck_patchify_mode == "patch_2x2":
-            z = self._patchify(z)
         return self.whiten(z).to(dtype=model_dtype)
     def encode_posterior(self, images: Tensor) -> EncoderPosterior:
@@ -267,13 +250,12 @@ class FCDMDiffAE(nn.Module):
     ) -> Tensor:
         """Decode whitened latents to images via VP diffusion.
-        Latents are dewhitened and (if applicable) unpatchified internally
-        before being passed to the decoder.
         Args:
-            latents: [B, latent_channels, h, w] whitened encoder latents.
-            height: Output image height (divisible by effective_patch_size).
-            width: Output image width (divisible by effective_patch_size).
             inference_config: Optional inference parameters.
         Returns:
@@ -289,11 +271,8 @@ class FCDMDiffAE(nn.Module):
         except StopIteration:
             model_dtype = torch.float32
-        # Dewhiten and unpatchify back to raw encoder scale for the decoder
-        latents = self.dewhiten(latents)
-        if config.bottleneck_patchify_mode == "patch_2x2":
-            latents = self._unpatchify(latents)
-        latents = latents.to(dtype=model_dtype)
         if height % config.patch_size != 0 or width % config.patch_size != 0:
             raise ValueError(

         super().__init__()
         self.config = config
+        # Latent running stats for whitening/dewhitening
         self.register_buffer(
             "latent_norm_running_mean",
+            torch.zeros((config.bottleneck_dim,), dtype=torch.float32),
         )
         self.register_buffer(
             "latent_norm_running_var",
+            torch.ones((config.bottleneck_dim,), dtype=torch.float32),
         )
         self.encoder = Encoder(
         mean, std = self._latent_norm_stats()
         return z * std.to(device=z.device) + mean.to(device=z.device)
     def encode(self, images: Tensor) -> Tensor:
         """Encode images to whitened latents (posterior mode).
         use by downstream latent-space diffusion models.
         Args:
+            images: [B, 3, H, W] in [-1, 1], H and W divisible by patch_size.
         Returns:
+            Whitened latents [B, bottleneck_dim, H/patch, W/patch].
         """
         try:
             model_dtype = next(self.parameters()).dtype
         except StopIteration:
             model_dtype = torch.float32
         z = self.encoder(images.to(dtype=model_dtype))
         return self.whiten(z).to(dtype=model_dtype)
     def encode_posterior(self, images: Tensor) -> EncoderPosterior:
     ) -> Tensor:
         """Decode whitened latents to images via VP diffusion.
+        Latents are dewhitened internally before being passed to the decoder.
         Args:
+            latents: [B, bottleneck_dim, h, w] whitened encoder latents.
+            height: Output image height (divisible by patch_size).
+            width: Output image width (divisible by patch_size).
             inference_config: Optional inference parameters.
         Returns:
         except StopIteration:
             model_dtype = torch.float32
+        # Dewhiten back to raw encoder scale for the decoder
+        latents = self.dewhiten(latents).to(dtype=model_dtype)
         if height % config.patch_size != 0 or width % config.patch_size != 0:
             raise ValueError(