Spaces:

dreamlessx
/

LandmarkDiff

Running

App Files Files Community

dreamlessx commited on 22 days ago

Commit

28dc803

verified ·

1 Parent(s): 5be749f

Update landmarkdiff/fid.py to v0.3.2

Browse files

Files changed (1) hide show

landmarkdiff/fid.py +81 -77

landmarkdiff/fid.py CHANGED Viewed

@@ -24,10 +24,10 @@ try:
     import torch
     import torch.nn as nn
     from torch.utils.data import DataLoader, Dataset
     HAS_TORCH = True
 except ImportError:
     HAS_TORCH = False
 def _load_inception_v3() -> Any:
@@ -42,95 +42,99 @@ def _load_inception_v3() -> Any:
     return model
-# Guard torch-dependent class and function definitions so the module
-# can be imported safely when torch is not installed.
-if HAS_TORCH:
-    class ImageFolderDataset(Dataset):  # type: ignore[misc]
-        """Simple dataset that loads images from a directory."""
-        def __init__(self, directory: str | Path, image_size: int = 299):
-            self.directory = Path(directory)
-            exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
-            self.files = sorted(
-                f for f in self.directory.iterdir() if f.suffix.lower() in exts and f.is_file()
-            )
-            self.image_size = image_size
-        def __len__(self) -> int:
-            return len(self.files)
-        def __getitem__(self, idx: int) -> Any:
-            import cv2
-            img = cv2.imread(str(self.files[idx]))
-            if img is None:
-                # Return zeros if image can't be loaded
-                return torch.zeros(3, self.image_size, self.image_size)
             img = cv2.resize(img, (self.image_size, self.image_size))
             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-            # Normalize to [0, 1] then ImageNet normalize
-            t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
-            t = _imagenet_normalize(t)
-            return t
-    class NumpyArrayDataset(Dataset):  # type: ignore[misc]
-        """Dataset wrapping a list of numpy arrays."""
-        def __init__(self, images: list[np.ndarray], image_size: int = 299):
-            self.images = images
-            self.image_size = image_size
-        def __len__(self) -> int:
-            return len(self.images)
-        def __getitem__(self, idx: int) -> Any:
-            import cv2
-            img = self.images[idx]
-            if img.shape[:2] != (self.image_size, self.image_size):
-                img = cv2.resize(img, (self.image_size, self.image_size))
-            if img.shape[2] == 3:
-                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-            t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
-            t = _imagenet_normalize(t)
-            return t
-    def _imagenet_normalize(t: Any) -> Any:
-        """Apply ImageNet normalization."""
-        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
-        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
-        return (t - mean) / std
-    @torch.no_grad()
-    def _extract_features(
-        model: Any,
-        dataloader: Any,
-        device: Any,
-    ) -> np.ndarray:
-        """Extract InceptionV3 pool3 features from a dataloader."""
-        features = []
         for batch in dataloader:
             batch = batch.to(device)
             feat = model(batch)
             if isinstance(feat, tuple):
                 feat = feat[0]
             features.append(feat.cpu().numpy())
-        return np.concatenate(features, axis=0)
 def _compute_statistics(features: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
     """Compute mean and covariance of feature vectors."""
     mu = np.mean(features, axis=0)
     sigma = np.cov(features, rowvar=False)
     return mu, sigma
 def _calculate_fid(
-    mu1: np.ndarray,
-    sigma1: np.ndarray,
-    mu2: np.ndarray,
-    sigma2: np.ndarray,
 ) -> float:
     """Calculate FID given two sets of statistics.
@@ -146,7 +150,7 @@ def _calculate_fid(
         covmean = covmean.real
     fid = diff @ diff + np.trace(sigma1 + sigma2 - 2 * covmean)
-    return float(fid)
 def compute_fid_from_dirs(
@@ -183,10 +187,10 @@ def compute_fid_from_dirs(
     if len(real_ds) == 0 or len(gen_ds) == 0:
         raise ValueError("Need at least 1 image in each directory")
-    real_loader = DataLoader(
-        real_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=True
-    )
-    gen_loader = DataLoader(gen_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=True)
     real_features = _extract_features(model, real_loader, dev)
     gen_features = _extract_features(model, gen_loader, dev)

     import torch
     import torch.nn as nn
     from torch.utils.data import DataLoader, Dataset
     HAS_TORCH = True
 except ImportError:
     HAS_TORCH = False
+    Dataset = object  # type: ignore[misc,assignment]
 def _load_inception_v3() -> Any:
     return model
+class ImageFolderDataset(Dataset):
+    """Simple dataset that loads images from a directory."""
+    def __init__(self, directory: str | Path, image_size: int = 299):
+        self.directory = Path(directory)
+        exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
+        self.files = sorted(
+            f for f in self.directory.iterdir()
+            if f.suffix.lower() in exts and f.is_file()
+        )
+        self.image_size = image_size
+    def __len__(self) -> int:
+        return len(self.files)
+    def __getitem__(self, idx: int) -> Any:
+        import cv2
+        img = cv2.imread(str(self.files[idx]))
+        if img is None:
+            # Return zeros if image can't be loaded
+            return torch.zeros(3, self.image_size, self.image_size)
+        img = cv2.resize(img, (self.image_size, self.image_size))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # Normalize to [0, 1] then ImageNet normalize
+        t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
+        t = _imagenet_normalize(t)
+        return t
+class NumpyArrayDataset(Dataset):
+    """Dataset wrapping a list of numpy arrays."""
+    def __init__(self, images: list[np.ndarray], image_size: int = 299):
+        self.images = images
+        self.image_size = image_size
+    def __len__(self) -> int:
+        return len(self.images)
+    def __getitem__(self, idx: int) -> Any:
+        import cv2
+        img = self.images[idx]
+        if img.shape[:2] != (self.image_size, self.image_size):
             img = cv2.resize(img, (self.image_size, self.image_size))
+        if img.ndim == 2:
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        elif img.shape[2] == 4:
+            img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
+        elif img.shape[2] == 3:
             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1)
+        t = _imagenet_normalize(t)
+        return t
+def _imagenet_normalize(t: torch.Tensor) -> torch.Tensor:
+    """Apply ImageNet normalization."""
+    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
+    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
+    return (t - mean) / std
+def _extract_features(
+    model: nn.Module,
+    dataloader: DataLoader,
+    device: torch.device,
+) -> np.ndarray:
+    """Extract InceptionV3 pool3 features from a dataloader."""
+    features = []
+    with torch.no_grad():
         for batch in dataloader:
             batch = batch.to(device)
             feat = model(batch)
             if isinstance(feat, tuple):
                 feat = feat[0]
             features.append(feat.cpu().numpy())
+    return np.concatenate(features, axis=0)
 def _compute_statistics(features: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
     """Compute mean and covariance of feature vectors."""
+    if features.shape[0] < 2:
+        raise ValueError(
+            f"FID requires at least 2 images, got {features.shape[0]}"
+        )
     mu = np.mean(features, axis=0)
     sigma = np.cov(features, rowvar=False)
     return mu, sigma
 def _calculate_fid(
+    mu1: np.ndarray, sigma1: np.ndarray,
+    mu2: np.ndarray, sigma2: np.ndarray,
 ) -> float:
     """Calculate FID given two sets of statistics.
         covmean = covmean.real
     fid = diff @ diff + np.trace(sigma1 + sigma2 - 2 * covmean)
+    return float(max(fid, 0.0))
 def compute_fid_from_dirs(
     if len(real_ds) == 0 or len(gen_ds) == 0:
         raise ValueError("Need at least 1 image in each directory")
+    real_loader = DataLoader(real_ds, batch_size=batch_size,
+                              num_workers=num_workers, pin_memory=True)
+    gen_loader = DataLoader(gen_ds, batch_size=batch_size,
+                             num_workers=num_workers, pin_memory=True)
     real_features = _extract_features(model, real_loader, dev)
     gen_features = _extract_features(model, gen_loader, dev)