HoomKh
/

ZeroShot-AD

Model card Files Files and versions Community

HoomKh commited on Dec 7, 2024

Commit

e5461d8

verified ·

1 Parent(s): ca4180d

files

Browse files

Files changed (22) hide show

Dockerfile +14 -0
datasets/all_classes_dataset.py +158 -0
datasets/mvec.py +301 -0
datasets/perlin.py +73 -0
datasets/rayan_dataset.py +127 -0
docker-compose.yml +21 -0
evaluation/base_eval.py +293 -0
evaluation/class_name_mapping.json +5 -0
evaluation/eval_main.py +78 -0
evaluation/json_score.py +98 -0
evaluation/utils/json_helpers.py +46 -0
evaluation/utils/metrics.py +111 -0
main.py +308 -0
models/anomaly_detector.py +186 -0
models/common.py +154 -0
models/glass.py +372 -0
models/model.py +101 -0
requirements.txt +8 -0
run.sh +2 -0
runner.py +37 -0
utils/dump_scores.py +34 -0
utils/feature_extractor.py +18 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# -----------------------------------------------------------------------------
+#  A sample Dockerfile to help you replicate our test environment
+# -----------------------------------------------------------------------------
+FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-runtime
+WORKDIR /app
+COPY . .
+# Install your python and apt requirements
+RUN pip install -r requirements.txt
+RUN apt-get update && apt-get install $(cat apt_requirements.txt) -y
+RUN chmod +x run.sh
+CMD ["python3", "runner.py"]

datasets/all_classes_dataset.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# datasets/all_classes_dataset.py
+import os
+from enum import Enum
+import PIL
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+class DatasetSplit(Enum):
+    TRAIN = "train"
+    VAL = "val"
+    TEST = "test"
+class AllClassesDataset(Dataset):
+    def __init__(
+        self,
+        source,
+        input_size=518,
+        output_size=224,
+        split=DatasetSplit.TEST,
+        external_transform=None,
+        **kwargs,
+    ):
+        """
+        Initialize the dataset to include all classes.
+        Args:
+            source (str): Path to the root data directory.
+            input_size (int): Input image size for transformations.
+            output_size (int): Output mask size.
+            split (DatasetSplit): Dataset split to use (TRAIN, VAL, TEST).
+            external_transform (callable, optional): External image transformations.
+            **kwargs: Additional keyword arguments.
+        """
+        super().__init__()
+        self.source = source
+        self.split = split
+        self.classnames_to_use = self.get_all_class_names()
+        self.imgpaths_per_class, self.data_to_iterate = self.get_image_data()
+        if external_transform is None:
+            self.transform_img = transforms.Compose([
+                transforms.Resize((input_size, input_size)),
+                # transforms.CenterCrop(input_size),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
+            ])
+        else:
+            self.transform_img = external_transform
+        self.transform_mask = transforms.Compose([
+            transforms.Resize((output_size, output_size)),
+            # transforms.CenterCrop(output_size),
+            transforms.ToTensor(),
+        ])
+        self.output_shape = (1, output_size, output_size)
+    def get_all_class_names(self):
+        """
+        Retrieve all class names (subdirectories) from the source directory.
+        Returns:
+            list: List of class names.
+        """
+        all_items = os.listdir(self.source)
+        classnames = [
+            item for item in all_items
+            if os.path.isdir(os.path.join(self.source, item))
+        ]
+        return classnames
+    def get_image_data(self):
+        """
+        Collect image paths and corresponding mask paths for all classes.
+        Returns:
+            tuple: (imgpaths_per_class, data_to_iterate)
+        """
+        imgpaths_per_class = {}
+        maskpaths_per_class = {}
+        for classname in self.classnames_to_use:
+            classpath = os.path.join(self.source, classname, self.split.value)
+            maskpath = os.path.join(self.source, classname, "ground_truth")
+            anomaly_types = os.listdir(classpath)
+            imgpaths_per_class[classname] = {}
+            maskpaths_per_class[classname] = {}
+            for anomaly in anomaly_types:
+                anomaly_path = os.path.join(classpath, anomaly)
+                anomaly_files = sorted(os.listdir(anomaly_path))
+                imgpaths_per_class[classname][anomaly] = [
+                    os.path.join(anomaly_path, x) for x in anomaly_files
+                ]
+                if self.split == DatasetSplit.TEST and anomaly != "good":
+                    anomaly_mask_path = os.path.join(maskpath, anomaly)
+                    if os.path.exists(anomaly_mask_path):
+                        anomaly_mask_files = sorted(os.listdir(anomaly_mask_path))
+                        maskpaths_per_class[classname][anomaly] = [
+                            os.path.join(anomaly_mask_path, x) for x in anomaly_mask_files
+                        ]
+                    else:
+                        # If mask path does not exist, set to None
+                        maskpaths_per_class[classname][anomaly] = [None] * len(anomaly_files)
+                else:
+                    maskpaths_per_class[classname]["good"] = [None] * len(anomaly_files)
+        data_to_iterate = []
+        for classname in sorted(imgpaths_per_class.keys()):
+            for anomaly in sorted(imgpaths_per_class[classname].keys()):
+                for i, image_path in enumerate(imgpaths_per_class[classname][anomaly]):
+                    data_tuple = [classname, anomaly, image_path]
+                    if self.split == DatasetSplit.TEST and anomaly != "good":
+                        mask_path = maskpaths_per_class[classname][anomaly][i]
+                        data_tuple.append(mask_path)
+                    else:
+                        data_tuple.append(None)
+                    data_to_iterate.append(data_tuple)
+        return imgpaths_per_class, data_to_iterate
+    def __getitem__(self, idx):
+        classname, anomaly, image_path, mask_path = self.data_to_iterate[idx]
+        try:
+            image = PIL.Image.open(image_path).convert("RGB")
+        except Exception as e:
+            # Return a black image or handle as per your requirement
+            image = PIL.Image.new("RGB", (self.transform_img.transforms[0].size, self.transform_img.transforms[0].size), (0, 0, 0))
+        image = self.transform_img(image)
+        if self.split == DatasetSplit.TEST and mask_path is not None:
+            try:
+                mask = PIL.Image.open(mask_path).convert("L")
+                mask = self.transform_mask(mask) > 0
+            except Exception as e:
+                mask = torch.zeros([*self.output_shape])
+        else:
+            mask = torch.zeros([*self.output_shape])
+        return {
+            "image": image,                        # Tensor: [3, H, W]
+            "mask": mask,                          # Tensor: [1, 17, 17]
+            "is_anomaly": int(anomaly != "good"),
+            "image_path": image_path,
+        }
+    def __len__(self):
+        return len(self.data_to_iterate)

datasets/mvec.py ADDED Viewed

	@@ -0,0 +1,301 @@

+from torchvision import transforms
+from .perlin import perlin_mask
+from enum import Enum
+import numpy as np
+import pandas as pd
+import logging
+LOGGER = logging.getLogger(__name__)
+import PIL
+import torch
+import os
+import glob
+_CLASSNAMES = [
+    "carpet",
+    "grid",
+    "leather",
+    "tile",
+    "wood",
+    "bottle",
+    "cable",
+    "capsule",
+    "hazelnut",
+    "metal_nut",
+    "pill",
+    "screw",
+    "toothbrush",
+    "transistor",
+    "zipper",
+]
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+class DatasetSplit(Enum):
+    TRAIN = "train"
+    TEST = "test"
+class MVTecDataset(torch.utils.data.Dataset):
+    """
+    PyTorch Dataset for MVTec.
+    """
+    def __init__(
+            self,
+            source,
+            anomaly_source_path='/root/dataset/dtd/images',
+            dataset_name='mvtec',
+            classname='leather',
+            resize=288,
+            imagesize=288,
+            split=DatasetSplit.TRAIN,
+            rotate_degrees=0,
+            translate=0,
+            brightness_factor=0,
+            contrast_factor=0,
+            saturation_factor=0,
+            gray_p=0,
+            h_flip_p=0,
+            v_flip_p=0,
+            distribution=0,
+            mean=0.5,
+            std=0.1,
+            fg=0,
+            rand_aug=1,
+            scale=0,
+            batch_size=8,
+            **kwargs,
+    ):
+        """
+        Args:
+            source: [str]. Path to the MVTec data folder.
+            classname: [str or None]. Name of MVTec class that should be
+                       provided in this dataset. If None, the datasets
+                       iterates over all available images.
+            resize: [int]. (Square) Size the loaded image initially gets
+                    resized to.
+            imagesize: [int]. (Square) Size the resized loaded image gets
+                       (center-)cropped to.
+            split: [enum-option]. Indicates if training or test split of the
+                   data should be used. Has to be an option taken from
+                   DatasetSplit, e.g. mvtec.DatasetSplit.TRAIN. Note that
+                   mvtec.DatasetSplit.TEST will also load mask data.
+        """
+        super().__init__()
+        self.source = source
+        self.split = split
+        self.batch_size = batch_size
+        self.distribution = distribution
+        self.mean = mean
+        self.std = std
+        self.fg = fg
+        self.rand_aug = rand_aug
+        self.resize = resize if self.distribution != 1 else [resize, resize]
+        self.imgsize = imagesize
+        self.imagesize = (3, self.imgsize, self.imgsize)
+        self.classname = classname
+        self.dataset_name = dataset_name
+        if self.distribution != 1 and (self.classname == 'toothbrush' or self.classname == 'wood'):
+            self.resize = round(self.imgsize * 329 / 288)
+        xlsx_path = './datasets/excel/' + self.dataset_name + '_distribution.xlsx'
+        if self.fg == 2:  # choose by file
+            try:
+                df = pd.read_excel(xlsx_path)
+                self.class_fg = df.loc[df['Class'] == self.dataset_name + '_' + classname, 'Foreground'].values[0]
+            except:
+                self.class_fg = 1
+        elif self.fg == 1:  # with foreground mask
+            self.class_fg = 1
+        else:  # without foreground mask
+            self.class_fg = 0
+        self.imgpaths_per_class, self.data_to_iterate = self.get_image_data()
+        self.anomaly_source_paths = sorted(1 * glob.glob(anomaly_source_path + "/*/*/*/*.png") +
+                                           0 * list(next(iter(self.imgpaths_per_class.values())).values())[0])
+        print(self.anomaly_source_paths)
+        self.transform_img = [
+            transforms.Resize(self.resize),
+            transforms.ColorJitter(brightness_factor, contrast_factor, saturation_factor),
+            transforms.RandomHorizontalFlip(h_flip_p),
+            transforms.RandomVerticalFlip(v_flip_p),
+            transforms.RandomGrayscale(gray_p),
+            transforms.RandomAffine(rotate_degrees,
+                                    translate=(translate, translate),
+                                    scale=(1.0 - scale, 1.0 + scale),
+                                    interpolation=transforms.InterpolationMode.BILINEAR),
+            transforms.CenterCrop(self.imgsize),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
+        ]
+        self.transform_img = transforms.Compose(self.transform_img)
+        self.transform_mask = [
+            transforms.Resize(self.resize),
+            transforms.CenterCrop(self.imgsize),
+            transforms.ToTensor(),
+        ]
+        self.transform_mask = transforms.Compose(self.transform_mask)
+    def rand_augmenter(self):
+        list_aug = [
+            transforms.ColorJitter(contrast=(0.8, 1.2)),
+            transforms.ColorJitter(brightness=(0.8, 1.2)),
+            transforms.ColorJitter(saturation=(0.8, 1.2), hue=(-0.2, 0.2)),
+            transforms.RandomHorizontalFlip(p=1),
+            transforms.RandomVerticalFlip(p=1),
+            transforms.RandomGrayscale(p=1),
+            transforms.RandomAutocontrast(p=1),
+            transforms.RandomEqualize(p=1),
+            transforms.RandomAffine(degrees=(-45, 45)),
+        ]
+        aug_idx = np.random.choice(np.arange(len(list_aug)), 3, replace=False)
+        transform_aug = [
+            transforms.Resize(self.resize),
+            list_aug[aug_idx[0]],
+            list_aug[aug_idx[1]],
+            list_aug[aug_idx[2]],
+            transforms.CenterCrop(self.imgsize),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
+        ]
+        transform_aug = transforms.Compose(transform_aug)
+        return transform_aug
+    def __getitem__(self, idx):
+        try:
+            classname, anomaly, image_path, mask_path = self.data_to_iterate[idx]
+            # Load the main image
+            if not os.path.exists(image_path):
+                LOGGER.warning(f"Image not found: {image_path}. Skipping index {idx}.")
+                return None
+            image = PIL.Image.open(image_path).convert("RGB")
+            image = self.transform_img(image)
+            # Initialize default tensors
+            mask_fg = mask_s = aug_image = torch.tensor([1])
+            if self.split == DatasetSplit.TRAIN:
+                try:
+                    aug = PIL.Image.open(np.random.choice(self.anomaly_source_paths)).convert("RGB")
+                    if self.rand_aug:
+                        transform_aug = self.rand_augmenter()
+                        aug = transform_aug(aug)
+                    else:
+                        aug = self.transform_img(aug)
+                except IndexError:
+                    LOGGER.warning(f"No anomaly source images available. Using original image as augmentation for index {idx}.")
+                    aug = image  # Use original image if no anomaly source images
+                # Handle foreground mask
+                if self.class_fg:
+                    fgmask_path = (
+                        image_path.split(classname)[0]
+                        + classname
+                        + "/ground_truth/"
+                        + os.path.split(image_path)[-1].replace(".png", "_mask.png")
+                    )
+                    if os.path.exists(fgmask_path):
+                        mask_fg = PIL.Image.open(fgmask_path)
+                        mask_fg = torch.ceil(self.transform_mask(mask_fg)[0])
+                    else:
+                        LOGGER.warning(f"Foreground mask not found: {fgmask_path}. Skipping mask for index {idx}.")
+                        mask_fg = torch.zeros_like(image[0])  # Default empty mask
+                # Generate masks and augmented images
+                mask_all = perlin_mask(image.shape, self.imgsize // 8, 0, 6, mask_fg, 1)
+                mask_s = torch.from_numpy(mask_all[0])
+                mask_l = torch.from_numpy(mask_all[1])
+                beta = np.random.normal(loc=self.mean, scale=self.std)
+                beta = np.clip(beta, 0.2, 0.8)
+                aug_image = image * (1 - mask_l) + (1 - beta) * aug * mask_l + beta * image * mask_l
+            if self.split == DatasetSplit.TEST and mask_path is not None:
+                if os.path.exists(mask_path):
+                    mask_gt = PIL.Image.open(mask_path).convert("L")
+                    mask_gt = self.transform_mask(mask_gt)
+                else:
+                    LOGGER.warning(f"Ground truth mask not found: {mask_path}. Using default empty mask for index {idx}.")
+                    mask_gt = torch.zeros([1, *image.size()[1:]])
+            else:
+                mask_gt = torch.zeros([1, *image.size()[1:]])
+            return {
+                "image": image,
+                "aug": aug_image,
+                "mask_s": mask_s,
+                "mask_gt": mask_gt,
+                "is_anomaly": int(anomaly != "good"),
+                "image_path": image_path,
+            }
+        except Exception as e:
+            LOGGER.error(f"Error processing index {idx}: {e}")
+            return None
+    def __len__(self):
+        return len(self.data_to_iterate)
+    def get_image_data(self):
+        imgpaths_per_class = {}
+        maskpaths_per_class = {}
+        classpath = os.path.join(self.source, self.classname, self.split.value)
+        maskpath = os.path.join(self.source, self.classname, "ground_truth")
+        anomaly_types = os.listdir(classpath)
+        imgpaths_per_class[self.classname] = {}
+        maskpaths_per_class[self.classname] = {}
+        for anomaly in anomaly_types:
+            anomaly_path = os.path.join(classpath, anomaly)
+            anomaly_files = sorted(os.listdir(anomaly_path))
+            imgpaths_per_class[self.classname][anomaly] = [os.path.join(anomaly_path, x) for x in anomaly_files]
+            if self.split == DatasetSplit.TEST and anomaly != "good":
+                anomaly_mask_path = os.path.join(maskpath, anomaly)
+                if os.path.exists(anomaly_mask_path):
+                    anomaly_mask_files = sorted(os.listdir(anomaly_mask_path))
+                    maskpaths_per_class[self.classname][anomaly] = [os.path.join(anomaly_mask_path, x) for x in anomaly_mask_files]
+                else:
+                    LOGGER.warning(f"Anomaly mask path does not exist: {anomaly_mask_path}. Skipping masks for {anomaly}.")
+                    maskpaths_per_class[self.classname][anomaly] = []
+            else:
+                maskpaths_per_class[self.classname]["good"] = None
+        data_to_iterate = []
+        for classname in sorted(imgpaths_per_class.keys()):
+            for anomaly in sorted(imgpaths_per_class[classname].keys()):
+                for i, image_path in enumerate(imgpaths_per_class[classname][anomaly]):
+                    try:
+                        if self.split == DatasetSplit.TEST and anomaly != "good":
+                            if i < len(maskpaths_per_class[classname][anomaly]):
+                                mask_path = maskpaths_per_class[classname][anomaly][i]
+                            else:
+                                LOGGER.warning(f"No corresponding mask for {image_path}. Skipping.")
+                                continue
+                        else:
+                            mask_path = None
+                        if os.path.exists(image_path) and (mask_path is None or os.path.exists(mask_path)):
+                            data_to_iterate.append([classname, anomaly, image_path, mask_path])
+                        else:
+                            LOGGER.warning(f"Missing required file for {image_path} or {mask_path}. Skipping.")
+                    except Exception as e:
+                        LOGGER.error(f"Error processing file {image_path}: {e}. Skipping.")
+        if len(data_to_iterate) == 0:
+            raise ValueError("No valid data found. Please check dataset paths and files.")
+        return imgpaths_per_class, data_to_iterate

datasets/perlin.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import imgaug.augmenters as iaa
+import numpy as np
+import torch
+import math
+def generate_thr(img_shape, min=0, max=4):
+    min_perlin_scale = min
+    max_perlin_scale = max
+    perlin_scalex = 2 ** np.random.randint(min_perlin_scale, max_perlin_scale)
+    perlin_scaley = 2 ** np.random.randint(min_perlin_scale, max_perlin_scale)
+    perlin_noise_np = rand_perlin_2d_np((img_shape[1], img_shape[2]), (perlin_scalex, perlin_scaley))
+    threshold = 0.5
+    perlin_noise_np = iaa.Sequential([iaa.Affine(rotate=(-90, 90))])(image=perlin_noise_np)
+    perlin_thr = np.where(perlin_noise_np > threshold, np.ones_like(perlin_noise_np), np.zeros_like(perlin_noise_np))
+    return perlin_thr
+def perlin_mask(img_shape, feat_size, min, max, mask_fg, flag=0):
+    mask = np.zeros((feat_size, feat_size))
+    while np.max(mask) == 0:
+        perlin_thr_1 = generate_thr(img_shape, min, max)
+        perlin_thr_2 = generate_thr(img_shape, min, max)
+        temp = torch.rand(1).numpy()[0]
+        if temp > 2 / 3:
+            perlin_thr = perlin_thr_1 + perlin_thr_2
+            perlin_thr = np.where(perlin_thr > 0, np.ones_like(perlin_thr), np.zeros_like(perlin_thr))
+        elif temp > 1 / 3:
+            perlin_thr = perlin_thr_1 * perlin_thr_2
+        else:
+            perlin_thr = perlin_thr_1
+        perlin_thr = torch.from_numpy(perlin_thr)
+        perlin_thr_fg = perlin_thr * mask_fg
+        down_ratio_y = int(img_shape[1] / feat_size)
+        down_ratio_x = int(img_shape[2] / feat_size)
+        mask_ = perlin_thr_fg
+        mask = torch.nn.functional.max_pool2d(perlin_thr_fg.unsqueeze(0).unsqueeze(0), (down_ratio_y, down_ratio_x)).float()
+        mask = mask.numpy()[0, 0]
+    mask_s = mask
+    if flag != 0:
+        mask_l = mask_.numpy()
+    if flag == 0:
+        return mask_s
+    else:
+        return mask_s, mask_l
+def lerp_np(x, y, w):
+    fin_out = (y - x) * w + x
+    return fin_out
+def rand_perlin_2d_np(shape, res, fade=lambda t: 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3):
+    delta = (res[0] / shape[0], res[1] / shape[1])
+    d = (shape[0] // res[0], shape[1] // res[1])
+    grid = np.mgrid[0:res[0]:delta[0], 0:res[1]:delta[1]].transpose(1, 2, 0) % 1
+    angles = 2 * math.pi * np.random.rand(res[0] + 1, res[1] + 1)
+    gradients = np.stack((np.cos(angles), np.sin(angles)), axis=-1)
+    tt = np.repeat(np.repeat(gradients, d[0], axis=0), d[1], axis=1)
+    tile_grads = lambda slice1, slice2: np.repeat(np.repeat(gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]], d[0], axis=0), d[1],
+                                                  axis=1)
+    dot = lambda grad, shift: (
+            np.stack((grid[:shape[0], :shape[1], 0] + shift[0], grid[:shape[0], :shape[1], 1] + shift[1]),
+                     axis=-1) * grad[:shape[0], :shape[1]]).sum(axis=-1)
+    n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
+    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
+    n01 = dot(tile_grads([0, -1], [1, None]), [0, -1])
+    n11 = dot(tile_grads([1, None], [1, None]), [-1, -1])
+    t = fade(grid[:shape[0], :shape[1]])
+    return math.sqrt(2) * lerp_np(lerp_np(n00, n10, t[..., 0]), lerp_np(n01, n11, t[..., 0]), t[..., 1])

datasets/rayan_dataset.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# -----------------------------------------------------------------------------
+#  Do Not Alter This File!
+# -----------------------------------------------------------------------------
+#  The following code is part of the logic used for loading and evaluating your
+#  output scores. Please DO NOT modify this section, as upon your submission,
+#  the whole evaluation logic will be overwritten by the original code.
+# -----------------------------------------------------------------------------
+#  If you'd like to make modifications, you can create a completely new Dataset
+#  class or a child class that inherits from this one and use that with your
+#  data loader.
+# -----------------------------------------------------------------------------
+import os
+from enum import Enum
+import PIL
+import torch
+from torchvision import transforms
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+class DatasetSplit(Enum):
+    TRAIN = "train"
+    VAL = "val"
+    TEST = "test"
+class RayanDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        source,
+        classname,
+        input_size=518,
+        output_size=224,
+        split=DatasetSplit.TEST,
+        external_transform=None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.source = source
+        self.split = split
+        self.classnames_to_use = [classname]
+        self.imgpaths_per_class, self.data_to_iterate = self.get_image_data()
+        if external_transform is None:
+            self.transform_img = [
+                transforms.Resize((input_size, input_size)),
+                transforms.CenterCrop(input_size),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
+            ]
+            self.transform_img = transforms.Compose(self.transform_img)
+        else:
+            self.transform_img = external_transform
+        # Output size of the mask has to be of shape: 1×224×224
+        self.transform_mask = [
+            transforms.Resize((output_size, output_size)),
+            transforms.CenterCrop(output_size),
+            transforms.ToTensor(),
+        ]
+        self.transform_mask = transforms.Compose(self.transform_mask)
+        self.output_shape = (1, output_size, output_size)
+    def __getitem__(self, idx):
+        classname, anomaly, image_path, mask_path = self.data_to_iterate[idx]
+        image = PIL.Image.open(image_path).convert("RGB")
+        image = self.transform_img(image)
+        if self.split == DatasetSplit.TEST and mask_path is not None:
+            mask = PIL.Image.open(mask_path).convert("L")
+            mask = self.transform_mask(mask) > 0
+        else:
+            mask = torch.zeros([*self.output_shape])
+        return {
+            "image": image,
+            "mask": mask,
+            "is_anomaly": int(anomaly != "good"),
+            "image_path": image_path,
+        }
+    def __len__(self):
+        return len(self.data_to_iterate)
+    def get_image_data(self):
+        imgpaths_per_class = {}
+        maskpaths_per_class = {}
+        for classname in self.classnames_to_use:
+            classpath = os.path.join(self.source, classname, self.split.value)
+            maskpath = os.path.join(self.source, classname, "ground_truth")
+            anomaly_types = os.listdir(classpath)
+            imgpaths_per_class[classname] = {}
+            maskpaths_per_class[classname] = {}
+            for anomaly in anomaly_types:
+                anomaly_path = os.path.join(classpath, anomaly)
+                anomaly_files = sorted(os.listdir(anomaly_path))
+                imgpaths_per_class[classname][anomaly] = [
+                    os.path.join(anomaly_path, x) for x in anomaly_files
+                ]
+                if self.split == DatasetSplit.TEST and anomaly != "good":
+                    anomaly_mask_path = os.path.join(maskpath, anomaly)
+                    anomaly_mask_files = sorted(os.listdir(anomaly_mask_path))
+                    maskpaths_per_class[classname][anomaly] = [
+                        os.path.join(anomaly_mask_path, x) for x in anomaly_mask_files
+                    ]
+                else:
+                    maskpaths_per_class[classname]["good"] = None
+        data_to_iterate = []
+        for classname in sorted(imgpaths_per_class.keys()):
+            for anomaly in sorted(imgpaths_per_class[classname].keys()):
+                for i, image_path in enumerate(imgpaths_per_class[classname][anomaly]):
+                    data_tuple = [classname, anomaly, image_path]
+                    if self.split == DatasetSplit.TEST and anomaly != "good":
+                        data_tuple.append(maskpaths_per_class[classname][anomaly][i])
+                    else:
+                        data_tuple.append(None)
+                    data_to_iterate.append(data_tuple)
+        return imgpaths_per_class, data_to_iterate

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+# -----------------------------------------------------------------------------
+#  A sample Docker Compose file to help you replicate our test environment
+# -----------------------------------------------------------------------------
+services:
+    zsad-service:
+        image: zsad-image:1
+        build:
+            context: .
+        container_name: zsad-container
+        volumes:
+            - ./shared_folder:/app/output
+        deploy:
+            resources:
+                reservations:
+                    devices:
+                        - driver: nvidia
+                          count: all
+                          capabilities: [gpu]
+        command: [ "python3", "runner.py" ]

evaluation/base_eval.py ADDED Viewed

	@@ -0,0 +1,293 @@

+# -----------------------------------------------------------------------------
+#  Do Not Alter This File!
+# -----------------------------------------------------------------------------
+#  The following code is part of the logic used for loading and evaluating your
+#  output scores. Please DO NOT modify this section, as upon your submission,
+#  the whole evaluation logic will be overwritten by the original code.
+# -----------------------------------------------------------------------------
+import warnings
+import os
+from pathlib import Path
+import csv
+import json
+import torch
+import datasets.rayan_dataset as rayan_dataset
+from evaluation.utils.metrics import compute_metrics
+warnings.filterwarnings("ignore")
+class BaseEval:
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.device = torch.device(
+            "cuda:{}".format(cfg["device"]) if torch.cuda.is_available() else "cpu"
+        )
+        self.path = cfg["datasets"]["data_path"]
+        self.dataset = cfg["datasets"]["dataset_name"]
+        self.save_csv = cfg["testing"]["save_csv"]
+        self.save_json = cfg["testing"]["save_json"]
+        self.categories = cfg["datasets"]["class_name"]
+        if isinstance(self.categories, str):
+            if self.categories.lower() == "all":
+                if self.dataset == "rayan_dataset":
+                    self.categories = self.get_available_class_names(self.path)
+            else:
+                self.categories = [self.categories]
+        self.output_dir = cfg["testing"]["output_dir"]
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.scores_dir = cfg["testing"]["output_scores_dir"]
+        self.class_name_mapping_dir = cfg["testing"]["class_name_mapping_dir"]
+        self.leaderboard_metric_weights = {
+            "image_auroc": 1.2,
+            "image_ap": 1.1,
+            "image_f1": 1.1,
+            "pixel_auroc": 1.0,
+            "pixel_aupro": 1.4,
+            "pixel_ap": 1.3,
+            "pixel_f1": 1.3,
+        }
+    def get_available_class_names(self, root_data_path):
+        all_items = os.listdir(root_data_path)
+        folder_names = [
+            item
+            for item in all_items
+            if os.path.isdir(os.path.join(root_data_path, item))
+        ]
+        return folder_names
+    def load_datasets(self, category):
+        dataset_classes = {
+            "rayan_dataset": rayan_dataset.RayanDataset,
+        }
+        dataset_splits = {
+            "rayan_dataset": rayan_dataset.DatasetSplit.TEST,
+        }
+        test_dataset = dataset_classes[self.dataset](
+            source=self.path,
+            split=dataset_splits[self.dataset],
+            classname=category,
+        )
+        return test_dataset
+    def get_category_metrics(self, category):
+        print(f"Loading scores of '{category}'")
+        gt_sp, pr_sp, gt_px, pr_px, _ = self.load_category_scores(category)
+        print(f"Computing metrics for '{category}'")
+        image_metric, pixel_metric = compute_metrics(gt_sp, pr_sp, gt_px, pr_px)
+        return image_metric, pixel_metric
+    def load_category_scores(self, category):
+        raise NotImplementedError()
+    def get_scores_path_for_image(self, image_path):
+        """example image_path: './data/photovoltaic_module/test/good/037.png'"""
+        path = Path(image_path)
+        category, split, anomaly_type = path.parts[-4:-1]
+        image_name = path.stem
+        return os.path.join(
+            self.scores_dir, category, split, anomaly_type, f"{image_name}_scores.json"
+        )
+    def calc_leaderboard_score(self, **metrics):
+        weighted_sum = 0
+        total_weight = 0
+        for key, weight in self.leaderboard_metric_weights.items():
+            metric = metrics.get(key)
+            weighted_sum += metric * weight
+            total_weight += weight
+        if total_weight == 0:
+            return 0
+        return weighted_sum / total_weight
+    def main(self):
+        image_auroc_list = []
+        image_f1_list = []
+        image_ap_list = []
+        pixel_auroc_list = []
+        pixel_f1_list = []
+        pixel_ap_list = []
+        pixel_aupro_list = []
+        leaderboard_score_list = []
+        for category in self.categories:
+            image_metric, pixel_metric = self.get_category_metrics(
+                category=category,
+            )
+            image_auroc, image_f1, image_ap = image_metric
+            pixel_auroc, pixel_f1, pixel_ap, pixel_aupro = pixel_metric
+            leaderboard_score = self.calc_leaderboard_score(
+                image_auroc=image_auroc,
+                image_f1=image_f1,
+                image_ap=image_ap,
+                pixel_auroc=pixel_auroc,
+                pixel_aupro=pixel_aupro,
+                pixel_f1=pixel_f1,
+                pixel_ap=pixel_ap,
+            )
+            image_auroc_list.append(image_auroc)
+            image_f1_list.append(image_f1)
+            image_ap_list.append(image_ap)
+            pixel_auroc_list.append(pixel_auroc)
+            pixel_f1_list.append(pixel_f1)
+            pixel_ap_list.append(pixel_ap)
+            pixel_aupro_list.append(pixel_aupro)
+            leaderboard_score_list.append(leaderboard_score)
+            print(category)
+            print(
+                "[image level] auroc:{}, f1:{}, ap:{}".format(
+                    image_auroc * 100,
+                    image_f1 * 100,
+                    image_ap * 100,
+                )
+            )
+            print(
+                "[pixel level] auroc:{}, f1:{}, ap:{}, aupro:{}".format(
+                    pixel_auroc * 100,
+                    pixel_f1 * 100,
+                    pixel_ap * 100,
+                    pixel_aupro * 100,
+                )
+            )
+            print(
+                "leaderboard score:{}".format(
+                    leaderboard_score * 100,
+                )
+            )
+        image_auroc_mean = sum(image_auroc_list) / len(image_auroc_list)
+        image_f1_mean = sum(image_f1_list) / len(image_f1_list)
+        image_ap_mean = sum(image_ap_list) / len(image_ap_list)
+        pixel_auroc_mean = sum(pixel_auroc_list) / len(pixel_auroc_list)
+        pixel_f1_mean = sum(pixel_f1_list) / len(pixel_f1_list)
+        pixel_ap_mean = sum(pixel_ap_list) / len(pixel_ap_list)
+        pixel_aupro_mean = sum(pixel_aupro_list) / len(pixel_aupro_list)
+        leaderboard_score_mean = sum(leaderboard_score_list) / len(
+            leaderboard_score_list
+        )
+        print("mean")
+        print(
+            "[image level] auroc:{}, f1:{}, ap:{}".format(
+                image_auroc_mean * 100, image_f1_mean * 100, image_ap_mean * 100
+            )
+        )
+        print(
+            "[pixel level] auroc:{}, f1:{}, ap:{}, aupro:{}".format(
+                pixel_auroc_mean * 100,
+                pixel_f1_mean * 100,
+                pixel_ap_mean * 100,
+                pixel_aupro_mean * 100,
+            )
+        )
+        print(
+            "leaderboard score:{}".format(
+                leaderboard_score_mean * 100,
+            )
+        )
+        # Save the final results as a csv file
+        if self.save_csv:
+            with open(self.class_name_mapping_dir, "r") as f:
+                class_name_mapping_dict = json.load(f)
+            csv_data = [
+                [
+                    "Category",
+                    "pixel_auroc",
+                    "pixel_f1",
+                    "pixel_ap",
+                    "pixel_aupro",
+                    "image_auroc",
+                    "image_f1",
+                    "image_ap",
+                    "leaderboard_score",
+                ]
+            ]
+            for i, category in enumerate(self.categories):
+                csv_data.append(
+                    [
+                        class_name_mapping_dict[category],
+                        pixel_auroc_list[i] * 100,
+                        pixel_f1_list[i] * 100,
+                        pixel_ap_list[i] * 100,
+                        pixel_aupro_list[i] * 100,
+                        image_auroc_list[i] * 100,
+                        image_f1_list[i] * 100,
+                        image_ap_list[i] * 100,
+                        leaderboard_score_list[i] * 100,
+                    ]
+                )
+            csv_data.append(
+                [
+                    "mean",
+                    pixel_auroc_mean * 100,
+                    pixel_f1_mean * 100,
+                    pixel_ap_mean * 100,
+                    pixel_aupro_mean * 100,
+                    image_auroc_mean * 100,
+                    image_f1_mean * 100,
+                    image_ap_mean * 100,
+                    leaderboard_score_mean * 100,
+                ]
+            )
+            csv_file_path = os.path.join(self.output_dir, "results.csv")
+            with open(csv_file_path, mode="w", newline="") as file:
+                writer = csv.writer(file)
+                writer.writerows(csv_data)
+        # Save the final results as a json file
+        if self.save_json:
+            json_data = []
+            with open(self.class_name_mapping_dir, "r") as f:
+                class_name_mapping_dict = json.load(f)
+            for i, category in enumerate(self.categories):
+                json_data.append(
+                    {
+                        "Category": class_name_mapping_dict[category],
+                        "pixel_auroc": pixel_auroc_list[i] * 100,
+                        "pixel_f1": pixel_f1_list[i] * 100,
+                        "pixel_ap": pixel_ap_list[i] * 100,
+                        "pixel_aupro": pixel_aupro_list[i] * 100,
+                        "image_auroc": image_auroc_list[i] * 100,
+                        "image_f1": image_f1_list[i] * 100,
+                        "image_ap": image_ap_list[i] * 100,
+                        "leaderboard_score": leaderboard_score_list[i] * 100,
+                    }
+                )
+            json_data.append(
+                {
+                    "Category": "mean",
+                    "pixel_auroc": pixel_auroc_mean * 100,
+                    "pixel_f1": pixel_f1_mean * 100,
+                    "pixel_ap": pixel_ap_mean * 100,
+                    "pixel_aupro": pixel_aupro_mean * 100,
+                    "image_auroc": image_auroc_mean * 100,
+                    "image_f1": image_f1_mean * 100,
+                    "image_ap": image_ap_mean * 100,
+                    "leaderboard_score": leaderboard_score_mean * 100,
+                }
+            )
+            json_file_path = os.path.join(self.output_dir, "results.json")
+            with open(json_file_path, mode="w") as file:
+                final_json = {
+                    "result": leaderboard_score_mean * 100,
+                    "metadata": json_data,
+                }
+                json.dump(final_json, file, indent=4)

evaluation/class_name_mapping.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "pill": "industrial_01",
+    "photovoltaic_module": "industrial_02",
+    "capsules": "industrial_03"
+}

evaluation/eval_main.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# -----------------------------------------------------------------------------
+#  Do Not Alter This File!
+# -----------------------------------------------------------------------------
+#  The following code is part of the logic used for loading and evaluating your
+#  output scores. Please DO NOT modify this section, as upon your submission,
+#  the whole evaluation logic will be overwritten by the original code.
+# -----------------------------------------------------------------------------
+import warnings
+import argparse
+import os
+import sys
+sys.path.append(os.getcwd())
+from evaluation.json_score import JsonScoreEvaluator
+warnings.filterwarnings("ignore")
+def get_args():
+    parser = argparse.ArgumentParser(description="Rayan ZSAD Evaluation Code")
+    parser.add_argument("--data_path", type=str, default=None, help="dataset path")
+    parser.add_argument("--dataset_name", type=str, default=None, help="dataset name")
+    parser.add_argument("--class_name", type=str, default=None, help="category")
+    parser.add_argument("--device", type=int, default=None, help="gpu id")
+    parser.add_argument(
+        "--output_dir", type=str, default=None, help="save results path"
+    )
+    parser.add_argument(
+        "--output_scores_dir", type=str, default=None, help="save scores path"
+    )
+    parser.add_argument("--save_csv", type=str, default=None, help="save csv")
+    parser.add_argument("--save_json", type=str, default=None, help="save json")
+    parser.add_argument(
+        "--class_name_mapping_dir",
+        type=str,
+        default=None,
+        help="mapping from actual class names to class numbers",
+    )
+    args = parser.parse_args()
+    return args
+def load_args(cfg, args):
+    cfg["datasets"]["data_path"] = args.data_path
+    assert os.path.exists(
+        cfg["datasets"]["data_path"]
+    ), f"The dataset path {cfg['datasets']['data_path']} does not exist."
+    cfg["datasets"]["dataset_name"] = args.dataset_name
+    cfg["datasets"]["class_name"] = args.class_name
+    cfg["device"] = args.device
+    if isinstance(cfg["device"], int):
+        cfg["device"] = str(cfg["device"])
+    cfg["testing"]["output_dir"] = args.output_dir
+    cfg["testing"]["output_scores_dir"] = args.output_scores_dir
+    os.makedirs(cfg["testing"]["output_scores_dir"], exist_ok=True)
+    cfg["testing"]["class_name_mapping_dir"] = args.class_name_mapping_dir
+    if args.save_csv.lower() == "true":
+        cfg["testing"]["save_csv"] = True
+    else:
+        cfg["testing"]["save_csv"] = False
+    if args.save_json.lower() == "true":
+        cfg["testing"]["save_json"] = True
+    else:
+        cfg["testing"]["save_json"] = False
+    return cfg
+if __name__ == "__main__":
+    args = get_args()
+    cfg = load_args(cfg={"datasets": {}, "testing": {}, "models": {}}, args=args)
+    print(cfg)
+    model = JsonScoreEvaluator(cfg=cfg)
+    model.main()

evaluation/json_score.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# -----------------------------------------------------------------------------
+#  Do Not Alter This File!
+# -----------------------------------------------------------------------------
+#  The following code is part of the logic used for loading and evaluating your
+#  output scores. Please DO NOT modify this section, as upon your submission,
+#  the whole evaluation logic will be overwritten by the original code.
+# -----------------------------------------------------------------------------
+import warnings
+import numpy as np
+import torch
+from tqdm import tqdm
+from evaluation.base_eval import BaseEval
+from evaluation.utils.json_helpers import json_to_dict
+warnings.filterwarnings("ignore")
+class JsonScoreEvaluator(BaseEval):
+    """
+    Evaluates anomaly detection performance based on pre-computed scores stored in JSON files.
+    This class extends the BaseEval class and specializes in reading scores from JSON files,
+    computing evaluation metrics, and optionally saving results to CSV or JSON format.
+    Notes:
+        - Score files are expected to follow the exact dataset structure.
+                `{category}/{split}/{anomaly_type}/{image_name}_scores.json`
+          e.g., `photovoltaic_module/test/good/037_scores.json`
+        - Score files are expected to be at `self.scores_dir`.
+    Example usage:
+        >>> evaluator = JsonScoreEvaluator(cfg)
+        >>> results = evaluator.main()
+    """
+    def __init__(self, cfg):
+        super().__init__(cfg)
+    def get_scores_for_image(self, image_path):
+        image_scores_path = self.get_scores_path_for_image(image_path)
+        image_scores = json_to_dict(image_scores_path)
+        return image_scores
+    def load_category_scores(self, category):
+        cls_scores_list = []  # image level prediction
+        anomaly_maps = []  # pixel level prediction
+        gt_list = []  # image level ground truth
+        img_masks = []  # pixel level ground truth
+        image_path_list = []
+        test_dataset = self.load_datasets(category)
+        test_dataloader = torch.utils.data.DataLoader(
+            test_dataset,
+            batch_size=1,
+            shuffle=False,
+            num_workers=0,
+            pin_memory=True,
+        )
+        for image_info in tqdm(test_dataloader):
+            if not isinstance(image_info, dict):
+                raise ValueError("Encountered non-dict image in dataloader")
+            del image_info["image"]
+            image_path = image_info["image_path"][0]
+            image_path_list.extend(image_path)
+            img_masks.append(image_info["mask"])
+            gt_list.extend(list(image_info["is_anomaly"].numpy()))
+            image_scores = self.get_scores_for_image(image_path)
+            cls_scores = image_scores["img_level_score"]
+            anomaly_maps_iter = image_scores["pix_level_score"]
+            cls_scores_list.append(cls_scores)
+            anomaly_maps.append(anomaly_maps_iter)
+        pr_sp = np.array(cls_scores_list)
+        gt_sp = np.array(gt_list)
+        pr_px = np.array(anomaly_maps)
+        gt_px = torch.cat(img_masks, dim=0).numpy().astype(np.int32)
+        print(pr_px.shape)
+        assert pr_px.shape[1:] == (
+            1,
+            224,
+            224,
+        ), "Predicted output scores do not meet the expected shape!"
+        assert gt_px.shape[1:] == (
+            1,
+            224,
+            224,
+        ), "Loaded ground truth maps do not meet the expected shape!"
+        return gt_sp, pr_sp, gt_px, pr_px, image_path_list

evaluation/utils/json_helpers.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# -----------------------------------------------------------------------------
+#  Do Not Alter This File!
+# -----------------------------------------------------------------------------
+#  The following code is part of the logic used for loading and evaluating your
+#  output scores. Please DO NOT modify this section, as upon your submission,
+#  the whole evaluation logic will be overwritten by the original code.
+# -----------------------------------------------------------------------------
+import json
+import numpy as np
+class NumpyEncoder(json.JSONEncoder):
+    """Special json encoder for numpy types"""
+    def default(self, obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return {
+                "__ndarray__": obj.tolist(),
+                "dtype": str(obj.dtype),
+                "shape": obj.shape,
+            }
+        else:
+            return super(NumpyEncoder, self).default(obj)
+def dict_to_json(dct, filename):
+    """Save a dictionary to a JSON file"""
+    with open(filename, "w") as f:
+        json.dump(dct, f, cls=NumpyEncoder)
+def json_to_dict(filename):
+    """Load a JSON file and convert it back to a dictionary of NumPy arrays"""
+    with open(filename, "r") as f:
+        dct = json.load(f)
+    for k, v in dct.items():
+        if isinstance(v, dict) and "__ndarray__" in v:
+            dct[k] = np.array(v["__ndarray__"], dtype=v["dtype"]).reshape(v["shape"])
+    return dct

evaluation/utils/metrics.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# -----------------------------------------------------------------------------
+#  Do Not Alter This File!
+# -----------------------------------------------------------------------------
+#  The following code is part of the logic used for loading and evaluating your
+#  output scores. Please DO NOT modify this section, as upon your submission,
+#  the whole evaluation logic will be overwritten by the original code.
+# -----------------------------------------------------------------------------
+import numpy as np
+from sklearn.metrics import (
+    auc,
+    roc_auc_score,
+    average_precision_score,
+    precision_recall_curve,
+)
+from skimage import measure
+import warnings
+# ref: https://github.com/gudovskiy/cflow-ad/blob/master/train.py
+def cal_pro_score(masks, amaps, max_step=200, expect_fpr=0.3):
+    binary_amaps = np.zeros_like(amaps, dtype=bool)
+    min_th, max_th = amaps.min(), amaps.max()
+    delta = (max_th - min_th) / max_step
+    pros, fprs, ths = [], [], []
+    for th in np.arange(min_th, max_th, delta):
+        binary_amaps[amaps <= th], binary_amaps[amaps > th] = 0, 1
+        pro = []
+        for binary_amap, mask in zip(binary_amaps, masks):
+            for region in measure.regionprops(measure.label(mask)):
+                tp_pixels = binary_amap[region.coords[:, 0], region.coords[:, 1]].sum()
+                pro.append(tp_pixels / region.area)
+        inverse_masks = 1 - masks
+        fp_pixels = np.logical_and(inverse_masks, binary_amaps).sum()
+        fpr = fp_pixels / inverse_masks.sum()
+        pros.append(np.array(pro).mean())
+        fprs.append(fpr)
+        ths.append(th)
+    pros, fprs, ths = np.array(pros), np.array(fprs), np.array(ths)
+    idxes = fprs < expect_fpr
+    fprs = fprs[idxes]
+    print("fprs: ", fprs)
+    fprs = (fprs - fprs.min()) / (fprs.max() - fprs.min())
+    pro_auc = auc(fprs, pros[idxes])
+    return pro_auc
+def compute_metrics(gt_sp=None, pr_sp=None, gt_px=None, pr_px=None):
+    # classification
+    if (
+        gt_sp is None
+        or pr_sp is None
+        or gt_sp.sum() == 0
+        or gt_sp.sum() == gt_sp.shape[0]
+    ):
+        auroc_sp, f1_sp, ap_sp = 0, 0, 0
+    else:
+        auroc_sp = roc_auc_score(gt_sp, pr_sp)
+        ap_sp = average_precision_score(gt_sp, pr_sp)
+        precisions, recalls, thresholds = precision_recall_curve(gt_sp, pr_sp)
+        f1_scores = (2 * precisions * recalls) / (precisions + recalls)
+        f1_sp = np.max(f1_scores[np.isfinite(f1_scores)])
+    # segmentation
+    if gt_px is None or pr_px is None or gt_px.sum() == 0:
+        auroc_px, f1_px, ap_px, aupro = 0, 0, 0, 0
+    else:
+        auroc_px = roc_auc_score(gt_px.ravel(), pr_px.ravel())
+        ap_px = average_precision_score(gt_px.ravel(), pr_px.ravel())
+        precisions, recalls, thresholds = precision_recall_curve(
+            gt_px.ravel(), pr_px.ravel()
+        )
+        f1_scores = (2 * precisions * recalls) / (precisions + recalls)
+        f1_px = np.max(f1_scores[np.isfinite(f1_scores)])
+        aupro = cal_pro_score(gt_px.squeeze(), pr_px.squeeze())
+    image_metric = [auroc_sp, f1_sp, ap_sp]
+    pixel_metric = [auroc_px, f1_px, ap_px, aupro]
+    return image_metric, pixel_metric
+def compute_auroc(labels, scores):
+    """
+    Computes the Area Under the Receiver Operating Characteristic Curve (AUROC).
+    Args:
+        labels (list or np.ndarray): True binary labels (0 for normal, 1 for anomaly).
+        scores (list or np.ndarray): Predicted scores or probabilities for the positive class.
+    Returns:
+        float: AUROC score. Returns None if AUROC is undefined.
+    """
+    # Convert inputs to numpy arrays
+    labels = np.array(labels)
+    scores = np.array(scores)
+    # Ensure that labels are binary
+    unique_labels = np.unique(labels)
+    if set(unique_labels) != {0, 1}:
+        raise ValueError(f"Labels must be binary (0 and 1). Found labels: {unique_labels}")
+    # Check if both classes are present
+    if len(unique_labels) < 2:
+        warnings.warn("Only one class present in labels. AUROC is undefined.")
+        return None
+    try:
+        auroc = roc_auc_score(labels, scores)
+        return auroc
+    except ValueError as e:
+        warnings.warn(f"Error computing AUROC: {e}")
+        return None

main.py ADDED Viewed

	@@ -0,0 +1,308 @@

+# main.py
+import os
+import torch
+from torch.utils.data import DataLoader
+from datasets.all_classes_dataset import AllClassesDataset, DatasetSplit
+from models.anomaly_detector import AnomalyDetector
+from utils.dump_scores import DumpScores
+import logging
+import json
+from sklearn.metrics import average_precision_score, roc_auc_score, f1_score
+import numpy as np
+import torch.nn.functional as F
+import random
+def set_seed(seed: int):
+    """
+    Set the seed for reproducibility across various libraries.
+    Args:
+        seed (int): The seed value to be set.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)  # For multi-GPU setups
+    # Ensure deterministic behavior in PyTorch
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    # For DataLoader workers
+    os.environ['PYTHONHASHSEED'] = str(seed)
+def worker_init_fn(worker_id):
+    """
+    Initialize the seed for each DataLoader worker to ensure reproducibility.
+    Args:
+        worker_id (int): The worker ID.
+    """
+    seed = torch.initial_seed()
+    np.random.seed(seed % 2**32)
+    random.seed(seed % 2**32)
+def compute_aupro(y_true_pixel, y_scores_pixel, num_thresholds=50):
+    """
+    Compute Area Under the Per-Region Overlap Curve (AUPRO).
+    Args:
+        y_true_pixel (np.ndarray): Ground truth binary masks, shape [N, H, W]
+        y_scores_pixel (np.ndarray): Predicted anomaly scores, shape [N, H, W]
+        num_thresholds (int): Number of thresholds to evaluate.
+    Returns:
+        float: AUPRO score.
+    """
+    # Define thresholds
+    thresholds = np.linspace(0, 1, num_thresholds)
+    # Initialize list to store overlaps
+    overlaps = []
+    for thresh in thresholds:
+        # Binarize predictions
+        y_pred = (y_scores_pixel >= thresh).astype(int)
+        # Compute Intersection over Union (IoU) for each sample
+        ious = []
+        for gt, pred in zip(y_true_pixel, y_pred):
+            intersection = np.logical_and(gt, pred).sum()
+            union = np.logical_or(gt, pred).sum()
+            if union == 0:
+                iou = 1.0  # If both gt and pred are all zeros
+            else:
+                iou = intersection / union
+            ious.append(iou)
+        # Average IoU over all samples
+        avg_iou = np.mean(ious)
+        overlaps.append(avg_iou)
+    # Compute the area under the overlap curve
+    aupro = np.trapz(overlaps, thresholds) / np.trapz([1] * len(thresholds), thresholds)  # Normalize
+    return aupro
+def compute_metrics(y_true_image, y_scores_image, y_true_pixel, y_scores_pixel):
+    """
+    Compute the required metrics based on true labels and predicted scores.
+    Args:
+        y_true_image (np.ndarray): Ground truth image labels, shape [N]
+        y_scores_image (np.ndarray): Predicted image scores, shape [N]
+        y_true_pixel (np.ndarray): Ground truth pixel masks, shape [N, H, W]
+        y_scores_pixel (np.ndarray): Predicted pixel anomaly scores, shape [N, H, W]
+    Returns:
+        dict: Dictionary containing computed metrics.
+    """
+    # Check image-level consistency
+    if len(y_true_image) != len(y_scores_image):
+        raise ValueError(f"Image-level y_true and y_scores have different lengths: {len(y_true_image)} vs {len(y_scores_image)}")
+    # Check pixel-level consistency
+    if y_true_pixel.shape != y_scores_pixel.shape:
+        raise ValueError(f"Pixel-level y_true and y_scores have different shapes: {y_true_pixel.shape} vs {y_scores_pixel.shape}")
+    # Image-level Metrics
+    image_ap = average_precision_score(y_true_image, y_scores_image)
+    image_auroc = roc_auc_score(y_true_image, y_scores_image)
+    y_pred_image = (y_scores_image >= 0.5).astype(int)
+    image_f1 = f1_score(y_true_image, y_pred_image)
+    # Pixel-level Metrics
+    pixel_ap = average_precision_score(y_true_pixel.flatten(), y_scores_pixel.flatten())
+    pixel_auroc = roc_auc_score(y_true_pixel.flatten(), y_scores_pixel.flatten())
+    pixel_aupro = compute_aupro(y_true_pixel, y_scores_pixel)
+    y_pred_pixel = (y_scores_pixel >= 0.5).astype(int)
+    pixel_f1 = f1_score(y_true_pixel.flatten(), y_pred_pixel.flatten())
+    # Compute leaderboard_score as a weighted average (example weights)
+    # Adjust weights as per your specific requirements
+    leaderboard_score = (
+        0.25 * image_auroc +
+        0.25 * image_f1 +
+        0.25 * pixel_auroc +
+        0.25 * pixel_f1
+    )
+    metrics = {
+        "image_metrics": {
+            "image_ap": round(float(image_ap), 4),
+            "image_auroc": round(float(image_auroc), 4),
+            "image_f1": round(float(image_f1), 4)
+        },
+        "pixel_metrics": {
+            "pixel_ap": round(float(pixel_ap), 4),
+            "pixel_aupro": round(float(pixel_aupro), 4),
+            "pixel_auroc": round(float(pixel_auroc), 4),
+            "pixel_f1": round(float(pixel_f1), 4)
+        },
+        "overall_metric": {
+            "leaderboard_score": round(float(leaderboard_score), 4)
+        }
+    }
+    return metrics
+def get_class_name(image_path, source_dir):
+    """
+    Extract the class name from the image path.
+    Args:
+        image_path (str): Path to the image file.
+        source_dir (str): Root source directory.
+    Returns:
+        str: Class name.
+    """
+    # Example image_path: "./data/pill/test/broken/image1.png"
+    rel_path = os.path.relpath(image_path, source_dir)  # "pill/test/broken/image1.png"
+    parts = rel_path.split(os.sep)
+    if len(parts) < 2:
+        raise ValueError(f"Unexpected image path format: {image_path}")
+    class_name = parts[0]  # "pill"
+    return class_name
+def main():
+    SEED = 41  # You can choose any integer value
+    set_seed(SEED)
+    # Configure logging
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    # Configuration
+    source_dir = "./data"
+    output_scores_dir = "./output_scores"
+    split = DatasetSplit.TEST  # Use the Enum instead of string
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    logging.info("Initializing the dataset and dataloader...")
+    # Initialize dataset and dataloader using AllClassesDataset with output_size=17
+    dataset = AllClassesDataset(
+        source=source_dir,
+        split=split,
+        # output_size=16  # Set to match anomaly_map resolution
+    )
+    dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0)
+    logging.info("Initializing the anomaly detector...")
+    # Initialize anomaly detector
+    detector = AnomalyDetector(device=device)
+    # Initialize DumpScores
+    dump_scores = DumpScores(output_dir=output_scores_dir)
+    logging.info("Starting anomaly detection inference...")
+    # Initialize containers for metrics
+    classes = dataset.get_all_class_names()
+    metrics_data = {cls: {
+        "y_true_image": [],
+        "y_scores_image": [],
+        "y_true_pixel": [],
+        "y_scores_pixel": []
+    } for cls in classes}
+    # Iterate through the dataset
+    for batch_idx, batch in enumerate(dataloader):
+        image = batch['image'].squeeze(0)  # Shape: [3, H, W]
+        mask = batch['mask'].squeeze(1).numpy()  # Remove all singleton dimensions to get [17, 17]
+        image_label = batch['is_anomaly'].item()  # 1 or 0
+        image_path = batch['image_path'][0]  # Assuming batch_size=1
+        # Extract class name from image_path
+        try:
+            class_name = get_class_name(image_path, source_dir)
+        except ValueError as e:
+            logging.error(f"Error extracting class name: {e}")
+            continue  # Skip this sample
+        # Extract features and compute scores using GLASS
+        image_score, anomaly_map = detector.extract_features(image, "all")
+        # Compute pixel-level anomaly score (already normalized)
+        pixel_score = detector.compute_pixel_score(anomaly_map).squeeze()
+        pixel_score_tensor = torch.from_numpy(pixel_score).float().unsqueeze(0).unsqueeze(0).to(
+            device)  # Shape: [1, 1, 17, 17]
+        # **Upsample pixel_score to (224, 224)**
+        # Option 1: Using PyTorch Interpolation
+        pixel_score = F.interpolate(
+            pixel_score_tensor,  # Add batch and channel dimensions
+            size=(224, 224),
+            mode='bilinear',
+            align_corners=False
+        ).squeeze(0).cpu().numpy()  # Removes all singleton dimensions, resulting in [224, 224]
+        # Option 2: Using OpenCV (Uncomment if preferred)
+        # pixel_score_np = pixel_score.numpy()
+        # pixel_score = cv2.resize(
+        #     pixel_score,
+        #     dsize=(224, 224),
+        #     interpolation=cv2.INTER_LINEAR
+        # )
+        # **Optional: Verify the upsampled pixel_score shape**
+        # if pixel_score.shape != (1, 224, 224):
+            # logging.warning(
+            #     f"Upsampled pixel score shape mismatch for image {image_path}: expected (224, 224), got {pixel_score.shape}")
+            # continue  # Skip this sample
+        # Append to metrics_data
+        metrics_data[class_name]["y_true_image"].append(image_label)
+        metrics_data[class_name]["y_scores_image"].append(image_score)
+        metrics_data[class_name]["y_true_pixel"].append(mask)
+        metrics_data[class_name]["y_scores_pixel"].append(pixel_score)
+        # Save individual image scores
+        dump_scores.save_scores([image_path], [image_score], [pixel_score])
+        logging.info(f"[{batch_idx + 1}/{len(dataloader)}] Processed image: {image_path}")
+        logging.info(f"Image-level score: {image_score:.4f}")
+        logging.info(f"Pixel-level mean score: {pixel_score.mean():.4f}")
+    logging.info("Anomaly detection inference completed. Computing metrics...")
+    # Initialize dictionary to hold metrics per class
+    classes_metrics = {}
+    for cls in classes:
+        y_true_image = np.array(metrics_data[cls]["y_true_image"])
+        y_scores_image = np.array(metrics_data[cls]["y_scores_image"])
+        y_true_pixel = np.array(metrics_data[cls]["y_true_pixel"])
+        y_scores_pixel = np.array(metrics_data[cls]["y_scores_pixel"])
+        # Check if there are any samples for the class
+        if len(y_true_image) == 0:
+            logging.warning(f"No samples found for class {cls}. Skipping metric computation.")
+            continue
+        try:
+            metrics = compute_metrics(y_true_image, y_scores_image, y_true_pixel, y_scores_pixel)
+            classes_metrics[cls] = metrics
+            logging.info(f"Metrics computed for class: {cls}")
+        except Exception as e:
+            logging.error(f"Failed to compute metrics for class {cls}: {e}")
+    # Save metrics to JSON
+    os.makedirs(output_scores_dir, exist_ok=True)
+    metrics_json_path = os.path.join(output_scores_dir, "metrics.json")
+    try:
+        with open(metrics_json_path, "w") as f:
+            json.dump(classes_metrics, f, indent=4)
+        logging.info(f"Metrics successfully saved to {metrics_json_path}")
+    except Exception as e:
+        logging.error(f"Failed to save metrics to {metrics_json_path}: {e}")
+if __name__ == "__main__":
+    main()

models/anomaly_detector.py ADDED Viewed

	@@ -0,0 +1,186 @@

+# models/anomaly_detector.py
+import torch
+import torch.nn.functional as F
+import numpy as np
+from .glass import GLASS  # Ensure correct import
+import os
+import logging
+from torchvision import models
+LOGGER = logging.getLogger(__name__)
+class AnomalyDetector:
+    def __init__(self, device='cuda'):
+        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
+        # Initialize the backbone (e.g., ResNet-50) without pretrained weights
+        backbone = models.resnet50(pretrained=False)
+        # Load backbone weights from local file
+        backbone_weights_path = './backbones/resnet50_backbone.pth'  # Update this path as needed
+        if os.path.exists(backbone_weights_path):
+            LOGGER.info(f"Loading ResNet-50 backbone weights from '{backbone_weights_path}'")
+            checkpoint = torch.load(backbone_weights_path, map_location="cpu")
+            try:
+                backbone.load_state_dict(checkpoint, strict=True)
+                LOGGER.info("ResNet-50 backbone weights loaded successfully.")
+            except RuntimeError as e:
+                LOGGER.error(f"Error loading ResNet-50 backbone state_dict: {e}")
+                raise
+        else:
+            LOGGER.error(f"Backbone weights not found at '{backbone_weights_path}'")
+            raise FileNotFoundError(f"Backbone weights not found at '{backbone_weights_path}'")
+        # Initialize the GLASS model
+        self.glass = GLASS(device=self.device)
+        # Define parameters for GLASS.load() to match training
+        layers_to_extract_from = ['layer4']  # Extract only the last layer
+        input_shape = (3, 224, 224)  # Match training input shape
+        pretrain_embed_dimension = 2048  # Corrected dimension for 'layer4' in ResNet-50
+        target_embed_dimension = 1024  # Match training target dimension
+        # Initialize GLASS with consistent parameters
+        self.glass.load(
+            backbone=backbone,
+            layers_to_extract_from=layers_to_extract_from,
+            device=self.device,
+            input_shape=input_shape,
+            pretrain_embed_dimension=pretrain_embed_dimension,
+            target_embed_dimension=target_embed_dimension,
+            patchsize=3,
+            patchstride=1,
+            meta_epochs=640,  # Not relevant for inference but required by load method
+            eval_epochs=1,
+            dsc_layers=2,
+            dsc_hidden=1024,
+            dsc_margin=0.5,
+            train_backbone=False,
+            pre_proj=1,
+            mining=1,
+            noise=0.015,
+            radius=0.75,
+            p=0.5,
+            lr=0.0001,
+            svd=0,
+            step=20,
+            limit=392,
+            **{}
+        )
+        # Set model directories
+        model_dir = "./models"  # Base directory for models
+        dataset_name = "rayan_dataset"  # Example dataset name
+        self.glass.set_model_dir(model_dir, dataset_name)
+        self.glass.to(self.device)
+        self.glass.eval()  # Set GLASS to evaluation mode
+        # Initialize a cache to keep track of loaded classes
+        self.loaded_classes = set()
+    def load_model_weights(self, model_dir, classname):
+        """
+        Load the saved model weights for a specific class.
+        Args:
+            model_dir (str): Base directory where models are saved.
+            classname (str): The class name whose model weights to load.
+        """
+        checkpoint_path = os.path.join(model_dir, classname, f"best_model_{classname}.pth")
+        if os.path.exists(checkpoint_path):
+            LOGGER.info(f"Loading model weights from '{checkpoint_path}' for class '{classname}'")
+            checkpoint = torch.load(checkpoint_path, map_location=self.device)
+            try:
+                self.glass.load_state_dict(checkpoint, strict=True)
+                LOGGER.info(f"Model weights loaded successfully for class '{classname}'")
+            except RuntimeError as e:
+                LOGGER.error(f"Error loading state_dict for class '{classname}': {e}")
+                raise
+        else:
+            LOGGER.error(f"Checkpoint not found at '{checkpoint_path}' for class '{classname}'")
+            raise FileNotFoundError(f"Checkpoint not found at '{checkpoint_path}' for class '{classname}'")
+    def extract_features(self, image, classname):
+        """
+        Use GLASS to extract features and generate anomaly scores for a specific class.
+        Args:
+            image (torch.Tensor): Image tensor of shape [3, H, W]
+            classname (str): The class name for which to perform anomaly detection.
+        Returns:
+            tuple: (image_score, anomaly_map)
+        """
+        # Load model weights for classname if not already loaded
+        # if classname not in self.loaded_classes:
+        #     try:
+        #         self.load_model_weights(model_dir="./models", classname=classname)
+        #         self.loaded_classes.add(classname)
+        #     except FileNotFoundError as e:
+        #         LOGGER.error(f"Failed to load model weights for class '{classname}': {e}")
+        #         raise
+        # Reshape image to include batch dimension
+        image = image.unsqueeze(0).to(self.device)  # Shape: [1, 3, H, W]
+        # Use GLASS to get embeddings
+        with torch.no_grad():
+            patch_features, patch_shapes = self.glass._embed(image, evaluation=True)
+            if self.glass.pre_proj > 0:
+                patch_features = self.glass.pre_projection(patch_features)
+                # Handle if pre_projection returns multiple outputs
+                if isinstance(patch_features, tuple) or isinstance(patch_features, list):
+                    patch_features = patch_features[0]
+            # Pass through discriminator to get anomaly scores
+            patch_scores = self.glass.discriminator(patch_features)
+            patch_scores = self.glass.patch_maker.unpatch_scores(patch_scores, batchsize=image.shape[0])
+            # Select the last layer's patch_shapes (only one layer now)
+            last_patch_shape = patch_shapes[-1]  # Should be [17, 17]
+            # Ensure that last_patch_shape is a list or tuple of two integers
+            if isinstance(last_patch_shape, (list, tuple)) and len(last_patch_shape) == 2:
+                # Reshape patch_scores to [batch_size, H_patches, W_patches]
+                # First, squeeze the last dimension
+                patch_scores = patch_scores.squeeze(-1)  # Shape: [1, 289]
+                # Reshape to [1, 17, 17]
+                patch_scores = patch_scores.reshape(image.shape[0], *last_patch_shape)  # [1, 17, 17]
+            else:
+                LOGGER.error(f"Unexpected patch_shapes format: {patch_shapes}")
+                raise ValueError(f"Unexpected patch_shapes format: {patch_shapes}")
+            # Compute image-level score (example: mean of patch scores)
+            image_score = patch_scores.mean().item()
+            # Anomaly map is the patch_scores itself, normalized
+            anomaly_map = patch_scores.cpu().numpy()
+            anomaly_map = np.clip(anomaly_map, 0, 1)
+            # Log anomaly map statistics for debugging
+            LOGGER.info(f"Anomaly map stats for class '{classname}': min={anomaly_map.min():.4f}, max={anomaly_map.max():.4f}, mean={anomaly_map.mean():.4f}")
+        return image_score, anomaly_map
+    def compute_pixel_score(self, anomaly_map):
+        """
+        Processes the anomaly map for pixel-level evaluation.
+        Args:
+            anomaly_map (np.ndarray): Anomaly map of shape [17, 17]
+        Returns:
+            np.ndarray: Processed anomaly map of shape [17, 17]
+        """
+        # Normalize anomaly_map to [0, 1]
+        min_val = anomaly_map.min()
+        max_val = anomaly_map.max()
+        if max_val - min_val < 1e-8:
+            LOGGER.warning("Anomaly map has zero variance. Returning zero map.")
+            return np.zeros_like(anomaly_map)
+        anomaly_map = (anomaly_map - min_val) / (max_val - min_val + 1e-8)
+        return anomaly_map

models/common.py ADDED Viewed

	@@ -0,0 +1,154 @@

+# common.py
+import copy
+import numpy as np
+import scipy.ndimage as ndimage
+import torch
+import torch.nn.functional as F
+from torch import nn
+class Preprocessing(torch.nn.Module):
+    def __init__(self, input_dims, output_dim):
+        super(Preprocessing, self).__init__()
+        self.input_dims = input_dims
+        self.output_dim = output_dim
+        self.preprocessing_modules = torch.nn.ModuleList()
+        for _ in input_dims:
+            module = MeanMapper(output_dim)
+            self.preprocessing_modules.append(module)
+    def forward(self, features):
+        _features = []
+        for module, feature in zip(self.preprocessing_modules, features):
+            _features.append(module(feature))
+        return torch.stack(_features, dim=1)
+class MeanMapper(torch.nn.Module):
+    def __init__(self, preprocessing_dim):
+        super(MeanMapper, self).__init__()
+        self.preprocessing_dim = preprocessing_dim
+    def forward(self, features):
+        features = features.reshape(len(features), 1, -1)
+        return F.adaptive_avg_pool1d(features, self.preprocessing_dim).squeeze(1)
+class Aggregator(torch.nn.Module):
+    def __init__(self, target_dim):
+        super(Aggregator, self).__init__()
+        self.target_dim = target_dim
+    def forward(self, features):
+        """Returns reshaped and average pooled features."""
+        features = features.reshape(len(features), 1, -1)
+        features = F.adaptive_avg_pool1d(features, self.target_dim)
+        return features.reshape(len(features), -1)
+class RescaleSegmentor:
+    def __init__(self, device, target_size=288):
+        self.device = device
+        self.target_size = target_size
+        self.smoothing = 4
+    def convert_to_segmentation(self, patch_scores):
+        with torch.no_grad():
+            if isinstance(patch_scores, np.ndarray):
+                patch_scores = torch.from_numpy(patch_scores)
+            _scores = patch_scores.to(self.device)
+            _scores = _scores.unsqueeze(1)
+            _scores = F.interpolate(
+                _scores, size=self.target_size, mode="bilinear", align_corners=False
+            )
+            _scores = _scores.squeeze(1)
+            patch_scores = _scores.cpu().numpy()
+        return [ndimage.gaussian_filter(patch_score, sigma=self.smoothing) for patch_score in patch_scores]
+class NetworkFeatureAggregator(torch.nn.Module):
+    """Efficient extraction of network features."""
+    def __init__(self, backbone, layers_to_extract_from, device, train_backbone=False):
+        super(NetworkFeatureAggregator, self).__init__()
+        """Extraction of network features.
+        Runs a network only to the last layer of the list of layers where
+        network features should be extracted from.
+        Args:
+            backbone: torchvision.model
+            layers_to_extract_from: [list of str]
+        """
+        self.layers_to_extract_from = layers_to_extract_from
+        self.backbone = backbone
+        self.device = device
+        self.train_backbone = train_backbone
+        if not hasattr(backbone, "hook_handles"):
+            self.backbone.hook_handles = []
+        for handle in self.backbone.hook_handles:
+            handle.remove()
+        self.outputs = {}
+        for extract_layer in layers_to_extract_from:
+            self.register_hook(extract_layer)
+        self.to(self.device)
+    def forward(self, images, eval=True):
+        self.outputs.clear()
+        if self.train_backbone and not eval:
+            self.backbone.train()
+            self.backbone(images)
+        else:
+            self.backbone.eval()
+            with torch.no_grad():
+                self.backbone(images)
+        return self.outputs
+    def feature_dimensions(self, input_shape):
+        """Computes the feature dimensions for all layers given input_shape."""
+        _input = torch.ones([1] + list(input_shape)).to(self.device)
+        _output = self(_input)
+        return [_output[layer].shape[1] for layer in self.layers_to_extract_from]
+    def register_hook(self, layer_name):
+        module = self.find_module(self.backbone, layer_name)
+        if module is not None:
+            forward_hook = ForwardHook(self.outputs, layer_name, self.layers_to_extract_from[-1])
+            if isinstance(module, torch.nn.Sequential):
+                hook = module[-1].register_forward_hook(forward_hook)
+            else:
+                hook = module.register_forward_hook(forward_hook)
+            self.backbone.hook_handles.append(hook)
+        else:
+            raise ValueError(f"Module {layer_name} not found in the model")
+    def find_module(self, model, module_name):
+        for name, module in model.named_modules():
+            if name == module_name:
+                return module
+            elif '.' in module_name:
+                father, child = module_name.split('.', 1)
+                if name == father:
+                    return self.find_module(module, child)
+        return None
+class ForwardHook:
+    def __init__(self, hook_dict, layer_name: str, last_layer_to_extract: str):
+        self.hook_dict = hook_dict
+        self.layer_name = layer_name
+        self.raise_exception_to_break = copy.deepcopy(
+            layer_name == last_layer_to_extract
+        )
+    def __call__(self, module, input, output):
+        self.hook_dict[self.layer_name] = output
+        return None
+class LastLayerToExtractReachedException(Exception):
+    pass

models/glass.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# models/glass.py
+import logging
+import math
+import os
+import torch
+import numpy as np
+import torch.nn as nn
+from torch.cuda.amp import GradScaler, autocast
+from .common import NetworkFeatureAggregator, Preprocessing, MeanMapper, Aggregator, RescaleSegmentor, ForwardHook
+import torch.nn.functional as F
+from torch.utils.tensorboard import SummaryWriter
+import torch.optim as optim
+from .model import Discriminator, Projection, PatchMaker
+LOGGER = logging.getLogger(__name__)
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+class TBWrapper:
+    def __init__(self, log_dir):
+        self.g_iter = 0
+        self.logger = SummaryWriter(log_dir=log_dir)
+    def step(self):
+        self.g_iter += 1
+    def log(self, tag, value, step):
+        self.logger.add_scalar(tag, value, step)
+class GLASS(torch.nn.Module):
+    def __init__(self, device):
+        super(GLASS, self).__init__()
+        self.device = device
+    def load(
+            self,
+            backbone,
+            layers_to_extract_from,
+            device,
+            input_shape,
+            pretrain_embed_dimension,
+            target_embed_dimension,
+            patchsize=3,
+            patchstride=1,
+            meta_epochs=640,
+            eval_epochs=1,
+            dsc_layers=2,
+            dsc_hidden=1024,
+            dsc_margin=0.5,
+            train_backbone=False,  # Changed to be set externally
+            pre_proj=1,
+            mining=1,
+            noise=0.015,
+            radius=0.75,
+            p=0.5,
+            lr=0.0001,
+            svd=0,
+            step=20,
+            limit=392,
+            **kwargs,
+    ):
+        self.backbone = backbone.to(device)
+        self.layers_to_extract_from = layers_to_extract_from
+        self.input_shape = input_shape
+        self.device = device
+        self.forward_modules = torch.nn.ModuleDict({})
+        feature_aggregator = NetworkFeatureAggregator(
+            self.backbone, self.layers_to_extract_from, self.device, train_backbone
+        )
+        feature_dimensions = feature_aggregator.feature_dimensions(input_shape)
+        self.forward_modules["feature_aggregator"] = feature_aggregator
+        preprocessing = Preprocessing(feature_dimensions, pretrain_embed_dimension)
+        self.forward_modules["preprocessing"] = preprocessing
+        self.target_embed_dimension = target_embed_dimension
+        preadapt_aggregator = Aggregator(target_dim=target_embed_dimension)
+        preadapt_aggregator.to(self.device)
+        self.forward_modules["preadapt_aggregator"] = preadapt_aggregator
+        self.meta_epochs = meta_epochs
+        self.lr = lr
+        self.train_backbone = train_backbone
+        if self.train_backbone:
+            self.backbone_opt = torch.optim.AdamW(self.forward_modules["feature_aggregator"].backbone.parameters(), lr)
+        self.pre_proj = pre_proj
+        if self.pre_proj > 0:
+            self.pre_projection = Projection(self.target_embed_dimension, self.target_embed_dimension, pre_proj)
+            self.pre_projection.to(self.device)
+            self.proj_opt = torch.optim.Adam(self.pre_projection.parameters(), lr, weight_decay=1e-5)
+        self.eval_epochs = eval_epochs
+        self.dsc_layers = dsc_layers
+        self.dsc_hidden = dsc_hidden
+        self.discriminator = Discriminator(self.target_embed_dimension, n_layers=dsc_layers, hidden=dsc_hidden)
+        self.discriminator.to(self.device)
+        self.dsc_opt = torch.optim.AdamW(self.discriminator.parameters(), lr=lr * 2)
+        self.dsc_margin = dsc_margin
+        self.c = torch.tensor(0)
+        self.c_ = torch.tensor(0)
+        self.p = p
+        self.radius = radius
+        self.mining = mining
+        self.noise = noise
+        self.svd = svd
+        self.step = step
+        self.limit = limit
+        self.distribution = 0
+        # Replace FocalLoss with MSELoss
+        self.loss_fn = nn.MSELoss()
+        self.patch_maker = PatchMaker(patchsize, stride=patchstride)
+        self.anomaly_segmentor = RescaleSegmentor(device=self.device, target_size=input_shape[-2:])
+        self.model_dir = ""
+        self.dataset_name = ""
+        self.logger = None
+    def set_model_dir(self, model_dir, dataset_name):
+        self.model_dir = model_dir
+        os.makedirs(self.model_dir, exist_ok=True)
+        self.ckpt_dir = os.path.join(self.model_dir, dataset_name)
+        os.makedirs(self.ckpt_dir, exist_ok=True)
+        self.tb_dir = os.path.join(self.ckpt_dir, "tb")
+        os.makedirs(self.tb_dir, exist_ok=True)
+        self.logger = TBWrapper(self.tb_dir)
+    def _embed(self, images, detach=True, provide_patch_shapes=False, evaluation=False):
+        """Returns feature embeddings for images."""
+        images = images.float()  # Ensure input tensor is float32
+        if not evaluation and self.train_backbone:
+            self.forward_modules["feature_aggregator"].train()
+            features = self.forward_modules["feature_aggregator"](images, eval=evaluation)
+        else:
+            self.forward_modules["feature_aggregator"].eval()
+            with torch.no_grad():
+                features = self.forward_modules["feature_aggregator"](images)
+        features = [features[layer] for layer in self.layers_to_extract_from]
+        for i, feat in enumerate(features):
+            if len(feat.shape) == 3:
+                B, L, C = feat.shape
+                sqrt_L = int(math.sqrt(L))
+                if sqrt_L * sqrt_L != L:
+                    raise ValueError(f"Layer {self.layers_to_extract_from[i]} output has non-square spatial dimensions: {feat.shape}")
+                features[i] = feat.reshape(B, sqrt_L, sqrt_L, C).permute(0, 3, 1, 2)
+                # Debug statement
+                assert features[i].requires_grad, f"Feature {i} from layer {self.layers_to_extract_from[i]} does not require grad."
+        features = [self.patch_maker.patchify(x, return_spatial_info=True) for x in features]
+        patch_shapes = [x[1] for x in features]
+        patch_features = [x[0] for x in features]
+        ref_num_patches = patch_shapes[0]
+        for i in range(1, len(patch_features)):
+            _features = patch_features[i]
+            patch_dims = patch_shapes[i]
+            _features = _features.reshape(
+                _features.shape[0], patch_dims[0], patch_dims[1], *_features.shape[2:]
+            )
+            _features = _features.permute(0, 3, 4, 5, 1, 2)
+            perm_base_shape = _features.shape
+            _features = _features.reshape(-1, *_features.shape[-2:])
+            _features = F.interpolate(
+                _features.unsqueeze(1),
+                size=(ref_num_patches[0], ref_num_patches[1]),
+                mode="bilinear",
+                align_corners=False,
+            )
+            _features = _features.squeeze(1)
+            _features = _features.reshape(
+                *perm_base_shape[:-2], ref_num_patches[0], ref_num_patches[1]
+            )
+            _features = _features.permute(0, 4, 5, 1, 2, 3)
+            _features = _features.reshape(len(_features), -1, *_features.shape[-3:])
+            patch_features[i] = _features
+        patch_features = [x.reshape(-1, *x.shape[-3:]) for x in patch_features]
+        patch_features = self.forward_modules["preprocessing"](patch_features)
+        patch_features = self.forward_modules["preadapt_aggregator"](patch_features)
+        return patch_features, patch_shapes
+    def trainer(self, training_data, val_data, name):
+        """
+        Training loop for the GLASS model.
+        Args:
+            training_data (DataLoader): DataLoader for the training dataset.
+            val_data (DataLoader): DataLoader for the validation dataset.
+            name (str): Name identifier for the training run.
+        """
+        self.train()
+        self.discriminator.train()
+        # Initialize optimizers
+        optimizer = optim.AdamW(self.forward_modules.parameters(), lr=self.lr)
+        optimizer_d = optim.AdamW(self.discriminator.parameters(), lr=self.lr * 2)
+        # Initialize loss functions
+        criterion_d = nn.BCEWithLogitsLoss()
+        # Initialize separate AMP scalers
+        scaler_main = GradScaler()
+        scaler_dsc = GradScaler()
+        # Initialize TensorBoard writer
+        if self.logger is not None:
+            tb_writer = self.logger
+        else:
+            tb_writer = SummaryWriter()
+        best_auroc = 0.0
+        best_model_path = os.path.join(self.model_dir, f"best_model_{name}.pth")
+        for epoch in range(1, self.meta_epochs + 1):
+            LOGGER.info(f"Epoch [{epoch}/{self.meta_epochs}]")
+            epoch_loss = 0.0
+            epoch_loss_d = 0.0
+            for batch_idx, batch in enumerate(training_data):
+                images = batch['image'].to(self.device).float()  # [B, 3, H, W]
+                aug_images = batch['aug'].to(self.device).float()  # [B, 3, H, W]
+                masks_s = batch['mask_s'].to(self.device).float()  # [B, H, W]
+                masks_gt = batch['mask_gt'].to(self.device).float()  # [B, 1, H, W]
+                optimizer.zero_grad()
+                optimizer_d.zero_grad()
+                # ----- Train Main Model -----
+                with autocast():
+                    # Forward pass
+                    embeddings, _ = self._embed(images)  # [B*N_patches, D]
+                    aug_embeddings, _ = self._embed(aug_images)  # [B*N_patches, D]
+                    # Aggregate embeddings to [B, D] by averaging over patches
+                    B = images.size(0)
+                    N_patches = embeddings.size(0) // B
+                    assert embeddings.size(
+                        0) == B * N_patches, "Embeddings cannot be evenly divided into the batch size."
+                    embeddings = embeddings.view(B, N_patches, -1).mean(dim=1)  # [B, D]
+                    aug_embeddings = aug_embeddings.view(B, N_patches, -1).mean(dim=1)  # [B, D]
+                    # Debug tensor properties
+                    assert embeddings.requires_grad, "Embeddings do not require grad!"
+                    assert aug_embeddings.requires_grad, "Augmented embeddings do not require grad!"
+                    assert embeddings.shape[0] == images.size(
+                        0), "Aggregated embeddings batch size does not match input batch size."
+                    # Compute reconstruction or similarity loss
+                    loss = self.loss_fn(embeddings, aug_embeddings)
+                # Backward pass with AMP for main model
+                scaler_main.scale(loss).backward()
+                scaler_main.step(optimizer)
+                scaler_main.update()
+                epoch_loss += loss.item()
+                # ----- Train Discriminator -----
+                with autocast():
+                    # Detach embeddings to prevent gradients flowing back to the main model
+                    embeddings_detached = embeddings.detach()
+                    aug_embeddings_detached = aug_embeddings.detach()
+                    # Discriminator forward pass
+                    outputs_real = self.discriminator(embeddings_detached)  # [B, 1]
+                    outputs_fake = self.discriminator(aug_embeddings_detached)  # [B, 1]
+                    # Create labels
+                    real_labels = torch.ones(outputs_real.size(0), 1).to(self.device)  # [B, 1]
+                    fake_labels = torch.zeros(outputs_fake.size(0), 1).to(self.device)  # [B, 1]
+                    # Compute discriminator loss
+                    loss_real = criterion_d(outputs_real, real_labels)
+                    loss_fake = criterion_d(outputs_fake, fake_labels)
+                    loss_d = loss_real + loss_fake
+                # Backward pass with AMP for discriminator
+                scaler_dsc.scale(loss_d).backward()
+                scaler_dsc.step(optimizer_d)
+                scaler_dsc.update()
+                epoch_loss_d += loss_d.item()
+                if batch_idx % 100 == 0:
+                    LOGGER.info(f"Batch [{batch_idx}/{len(training_data)}] "
+                                f"Loss: {loss.item():.4f} Loss_D: {loss_d.item():.4f}")
+            avg_epoch_loss = epoch_loss / len(training_data)
+            avg_epoch_loss_d = epoch_loss_d / len(training_data)
+            LOGGER.info(f"Epoch [{epoch}/{self.meta_epochs}] "
+                        f"Average Loss: {avg_epoch_loss:.4f} "
+                        f"Average Loss_D: {avg_epoch_loss_d:.4f}")
+            # Log to TensorBoard
+            tb_writer.log("Train/Loss", avg_epoch_loss, epoch)
+            tb_writer.log("Train/Loss_D", avg_epoch_loss_d, epoch)
+            # Validation
+            if epoch % self.eval_epochs == 0:
+                auroc = self.tester(val_data, name)
+                LOGGER.info(f"Validation AUROC after Epoch [{epoch}]: {auroc:.4f}")
+                tb_writer.log("Validation/AUROC", auroc, epoch)
+                # Save the best model
+                if auroc > best_auroc:
+                    best_auroc = auroc
+                    torch.save(self.state_dict(), best_model_path)  # Save only state_dict
+                    LOGGER.info(f"Best model saved at Epoch [{epoch}] with AUROC: {auroc:.4f}")
+        LOGGER.info(f"Training completed. Best AUROC: {best_auroc:.4f}")
+        tb_writer.close()
+    def tester(self, test_data, name):
+        """
+        Evaluation loop for the GLASS model.
+        Args:
+            test_data (DataLoader): DataLoader for the test dataset.
+            name (str): Name identifier for the evaluation run.
+        Returns:
+            float: AUROC score on the test dataset.
+        """
+        self.eval()
+        self.discriminator.eval()
+        all_scores = []
+        all_labels = []
+        with torch.no_grad():
+            for batch_idx, batch in enumerate(test_data):
+                images = batch['image'].to(self.device).float()  # [B, 3, H, W]
+                masks_gt = batch['mask_gt'].to(self.device).float()  # [B, 1, H, W]
+                labels = batch['is_anomaly'].cpu().numpy()  # [B]
+                # Forward pass
+                embeddings, _ = self._embed(images, evaluation=True)  # [B*N_patches, D]
+                B = images.size(0)
+                N_patches = embeddings.size(0) // B
+                embeddings = embeddings.view(B, N_patches, -1).mean(dim=1)  # [B, D]
+                anomaly_scores = self.discriminator(embeddings).cpu().numpy().flatten()  # [B]
+                all_scores.extend(anomaly_scores.tolist())
+                all_labels.extend(labels.tolist())
+        # Compute AUROC
+        from sklearn.metrics import roc_auc_score
+        auroc = roc_auc_score(all_labels, all_scores)
+        return auroc
+    def _evaluate(self, images, scores, segmentations, labels_gt, masks_gt, name, path='training'):
+        # Implementation of evaluation metrics
+        pass
+    def predict(self, test_dataloader):
+        """This function provides anomaly scores/maps for full dataloaders."""
+        # Implementation of prediction logic
+        pass
+    def _predict(self, img):
+        """Infer score and mask for a batch of images."""
+        # Implementation of individual prediction logic
+        pass

models/model.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# models/model.py
+import torch
+def init_weight(m):
+    if isinstance(m, torch.nn.Linear):
+        torch.nn.init.xavier_normal_(m.weight)
+    if isinstance(m, torch.nn.BatchNorm2d):
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+    elif isinstance(m, torch.nn.Conv2d):
+        m.weight.data.normal_(0.0, 0.02)
+class Discriminator(torch.nn.Module):
+    def __init__(self, in_planes, n_layers=2, hidden=None):
+        super(Discriminator, self).__init__()
+        _hidden = in_planes if hidden is None else hidden
+        self.body = torch.nn.Sequential()
+        for i in range(n_layers - 1):
+            _in = in_planes if i == 0 else _hidden
+            _hidden = int(_hidden // 1.5) if hidden is None else hidden
+            self.body.add_module('block%d' % (i + 1),
+                                 torch.nn.Sequential(
+                                     torch.nn.Linear(_in, _hidden),
+                                     torch.nn.BatchNorm1d(_hidden),
+                                     torch.nn.LeakyReLU(0.2)
+                                 ))
+        self.tail = torch.nn.Sequential(
+            torch.nn.Linear(_hidden, 1, bias=False),
+            torch.nn.Sigmoid()
+        )
+        self.apply(init_weight)
+    def forward(self, x):
+        x = self.body(x)
+        x = self.tail(x)
+        return x
+class Projection(torch.nn.Module):
+    def __init__(self, in_planes, out_planes=None, n_layers=1, layer_type=0):
+        super(Projection, self).__init__()
+        if out_planes is None:
+            out_planes = in_planes
+        self.layers = torch.nn.Sequential()
+        _in = None
+        _out = None
+        for i in range(n_layers):
+            _in = in_planes if i == 0 else _out
+            _out = out_planes
+            self.layers.add_module(f"{i}fc", torch.nn.Linear(_in, _out))
+            if i < n_layers - 1:
+                if layer_type > 1:
+                    self.layers.add_module(f"{i}relu", torch.nn.LeakyReLU(.2))
+        self.apply(init_weight)
+    def forward(self, x):
+        x = self.layers(x)
+        return x
+class PatchMaker:
+    def __init__(self, patchsize, top_k=0, stride=None):
+        self.patchsize = patchsize
+        self.stride = stride
+        self.top_k = top_k
+    def patchify(self, features, return_spatial_info=False):
+        """Convert a tensor into a tensor of respective patches.
+        Args:
+            x: [torch.Tensor, bs x c x w x h]
+        Returns:
+            x: [torch.Tensor, bs * w//stride * h//stride, c, patchsize,
+            patchsize]
+        """
+        padding = int((self.patchsize - 1) / 2)
+        unfolder = torch.nn.Unfold(kernel_size=self.patchsize, stride=self.stride, padding=padding, dilation=1)
+        unfolded_features = unfolder(features)
+        number_of_total_patches = []
+        for s in features.shape[-2:]:
+            n_patches = (s + 2 * padding - 1 * (self.patchsize - 1) - 1) / self.stride + 1
+            number_of_total_patches.append(int(n_patches))
+        unfolded_features = unfolded_features.reshape(
+            *features.shape[:2], self.patchsize, self.patchsize, -1
+        )
+        unfolded_features = unfolded_features.permute(0, 4, 1, 2, 3)
+        if return_spatial_info:
+            return unfolded_features, number_of_total_patches
+        return unfolded_features
+    def unpatch_scores(self, x, batchsize):
+        return x.reshape(batchsize, -1, *x.shape[1:])
+    def score(self, x):
+        x = x[:, :, 0]
+        x = torch.max(x, dim=1).values
+        return x

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch==2.4.1
+torchvision==0.16.1
+numpy==1.23.5
+Pillow==9.4.0
+tqdm==4.65.0
+scikit-image==0.20.0
+scikit-learn==1.2.2
+scipy==1.11.4

run.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ #!/bin/sh
2	+ python3 main.py

runner.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# -----------------------------------------------------------------------------
+#  This Python script is the primary entry point called by our judge. It runs
+#  your code to generate anomaly scores, then evaluates those scores to produce
+#  the final results.
+# -----------------------------------------------------------------------------
+import subprocess
+# Step 1: Generate anomaly scores
+subprocess.run(["./run.sh"], check=True)
+# Step 2: Evaluate the generated scores
+subprocess.run(
+    [
+        "python3",
+        "evaluation/eval_main.py",
+        "--device",
+        "0",
+        "--data_path",
+        "./data/",
+        "--dataset_name",
+        "rayan_dataset",
+        "--class_name",
+        "all",
+        "--output_dir",
+        "./output",
+        "--output_scores_dir",
+        "./output_scores",
+        "--save_csv",
+        "True",
+        "--save_json",
+        "True",
+        "--class_name_mapping_dir",
+        "./evaluation/class_name_mapping.json",
+    ],
+    check=True,
+)

utils/dump_scores.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# utils/dump_scores.py
+import os
+import json
+from pathlib import Path
+class DumpScores:
+    def __init__(self, output_dir):
+        self.output_dir = output_dir
+        os.makedirs(self.output_dir, exist_ok=True)
+    def save_scores(self, image_paths, img_level_scores, pix_level_scores):
+        for img_path, img_score, pix_score in zip(image_paths, img_level_scores, pix_level_scores):
+            # Determine the relative path to maintain directory structure
+            relative_path = os.path.relpath(img_path, "./data")
+            relative_dir = os.path.dirname(relative_path)
+            output_dir = os.path.join(self.output_dir, relative_dir)
+            os.makedirs(output_dir, exist_ok=True)
+            # Get the image filename without extension
+            img_name = Path(img_path).stem
+            # Create the JSON structure
+            score_data = {
+                "img_level_score": img_score,
+                "pix_level_score": pix_score.tolist()  # Convert numpy array to list for JSON serialization
+            }
+            # Define the output JSON file path
+            json_path = os.path.join(output_dir, f"{img_name}_scores.json")
+            # Save the JSON file
+            with open(json_path, "w") as f:
+                json.dump(score_data, f, indent=4)

utils/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# utils/feature_extractor.py
+import torch
+import torch.nn as nn
+from torchvision import models
+class FeatureExtractor(nn.Module):
+    def __init__(self, backbone='resnet50'):
+        super(FeatureExtractor, self).__init__()
+        if backbone == 'resnet50':
+            self.model = models.resnet50(pretrained=True)
+            # Remove the final fully connected layer
+            self.features = nn.Sequential(*list(self.model.children())[:-2])
+        else:
+            raise NotImplementedError(f"Backbone {backbone} is not implemented.")
+    def forward(self, x):
+        return self.features(x)