ExtendedRealityLab commited on Mar 6

Commit

ae29340

verified ·

1 Parent(s): e33b6e5

Add files using upload-large-folder tool

Browse files

Files changed (37) hide show

.gitattributes +2 -0
.idea/.gitignore +3 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/thundernet_upload.iml +8 -0
.idea/workspace.xml +35 -0
README.md +59 -3
data_gen.py +632 -0
images_toolkit.py +285 -0
inference_config.py +180 -0
model/model.py +372 -0
model/model_ppm_factors.py +438 -0
profiler.py +98 -0
requirements.txt +93 -0
resnet/.gitignore +208 -0
resnet/.idea/inspectionProfiles/profiles_settings.xml +6 -0
resnet/.idea/misc.xml +4 -0
resnet/.idea/modules.xml +8 -0
resnet/.idea/resnet.iml +12 -0
resnet/apt.txt +1 -0
resnet/crowdai.json +7 -0
resnet/fmodel.py +60 -0
resnet/main.py +7 -0
resnet/requirements.txt +3 -0
resnet/resnet18/__init__.py +0 -0
resnet/resnet18/checkpoints/model/checkpoint +1 -0
resnet/resnet18/checkpoints/model/graph.pbtxt +0 -0
resnet/resnet18/checkpoints/model/model.ckpt-5865.data-00000-of-00001 +3 -0
resnet/resnet18/checkpoints/model/model.ckpt-5865.index +0 -0
resnet/resnet18/checkpoints/model/model.ckpt-5865.meta +3 -0
resnet/resnet18/resnet_model.py +570 -0
resnet/run.sh +2 -0
thundernet_config.py +18 -0
train_config.py +311 -0
train_optuna.py +255 -0
utils.py +505 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+resnet/resnet18/checkpoints/model/model.ckpt-5865.meta filter=lfs diff=lfs merge=lfs -text
+resnet/resnet18/checkpoints/model/model.ckpt-5865.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (thundernet_upload)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/thundernet_upload.iml" filepath="$PROJECT_DIR$/.idea/thundernet_upload.iml" />
+    </modules>
+  </component>
+</project>

.idea/thundernet_upload.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,35 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="2b1e7e9a-7e79-45bd-bade-eff89bafbc84" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectId" id="3AZCQv5vVF8siEEdrRR9I1Xw2Ty" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "RunOnceActivity.OpenProjectViewOnStart": "true",
+    "RunOnceActivity.ShowReadmeOnStart": "true"
+  }
+}]]></component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="2b1e7e9a-7e79-45bd-bade-eff89bafbc84" name="Changes" comment="" />
+      <created>1772790692792</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1772790692792</updated>
+    </task>
+    <servers />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -1,3 +1,59 @@
----
-license: apache-2.0
----

+# Thundernet
+Thundernet is a semantic segmentation model that processes RGB input using convolutional networks to extract key features.
+### USE OF THE REPOSITORY - TRAINING
+To execute the train file: python train_config.py
+You can change the parameters adding them as flags in the previous command or changing them directly in the Thundernet_config.py file
+The parameters that you can include (that are relevant for training) are:
+- **train_path (str)**: path to the train data (default: "C:/Users/user/Documents/pruned_training/training/")
+- **val_path (str)**: path to the val data (default: "C:/Users/user/Documents/pruned_training/val/")
+- **model_dir (str)**: path to save the trained model (default: "C:/Users/user/Documents/Thundernet/pruebas_modelos/" )
+- **model_weights (str)**: NOT APPLICABLE FOR TRAINING path to the trained weights (default: "C:/Users/user/Documents/Thundernet/pruebas_modelos/32_ppm/BS4_lossBCE_weights_lr_0.00013713842558297858_reg-1.1743577101671763e-05-ep-13-val_loss0.11463435739278793-train_loss0.053004469722509384-val_iou0.8959722518920898-train_iou0.9606077075004578.hdf5")
+- **batch_size (int)**: batch size (default: 4)
+- **loss (str)**: type of loss to use (default: "BCE")
+- **classes (int)**: number of classes (default: 2). The original purpose of the code was to segment egocentric bodies, so two classes were involved (body and background)
+- **pretrained (bool)**: start from a pretrained model (default: False)
+- **pretrained_weights (str)**: path to the pretrained model (default: None)
+- **lr (float)**: learning rate (default: 1e-4)
+- **epochs (int)**: number of epochs (default: 15)
+- **resolution (str)**: resolution of input images (defualt: 640x480)
+- **kernel_regularizer (float)**: kernel regularizer (default: 2e-4)
+Note: there are more parameters in the file, but they are no use for training
+### USE OF THE REPOSITORY - EVALUATION
+To execute the evaluation, you can execute: python inference_config.py
+You can change the parameters adding them as flags in the previous command or changing them directly in the Thundernet_config2.py file
+(that are relevant for evaluation) are:
+- **model_weights (str)**: NOT APPLICABLE FOR TRAINING path to the trained weights (default: "C:/Users/user/Documents/Thundernet/pruebas_modelos/32_ppm/BS4_lossBCE_weights_lr_0.00013713842558297858_reg-1.1743577101671763e-05-ep-13-val_loss0.11463435739278793-train_loss0.053004469722509384-val_iou0.8959722518920898-train_iou0.9606077075004578.hdf5")
+- **batch_size (int)**: batch size (default: 4)
+- **resolution (str)**: resolution of input images (defualt: 640x480)
+Note: there are more parameters in the file, but they are no use for evaluation
+In the inference_config.py file you can execute the main with a "show=True" to display some predictions. However, you will have to close the image after every prediction.
+### DATA PREPARATION
+The data must be stored in a path with the following structure:
+    - data
+        + images (folder with RGB images in a .jpg format)
+            -- example1.jpg
+        + labels (folder with label images in a .png format)
+            -- example1.png
+The name for the files should be the same for the image and it's corresponding label.
+### OPTIMIZE HYPERPARAMETERS
+To find the best hyperparameters for training, you can execute the train_optuna.py file. This file will find the optimal values for batch_size, learning_rate and kernel_regularizer. The rest of the hyperparameters values will be obtained from the "thundernet_config.py" file.
+#### Compare models
+To compare if two models are the same (the weights are equal), the file "compare_models.py" can be executed. The path to the model's weights should be changed in the "weights_path1" and "weights_path2" variables.

data_gen.py ADDED Viewed

	@@ -0,0 +1,632 @@

+from pathlib import Path
+import numpy as np
+import cv2
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.compat.v1 as tf1
+from torch.utils.data import Dataset
+import random
+import torch.nn.functional as F
+class ImageHelper:
+    COLOR_TRANSFORMATIONS = [
+        "saturation",
+        "contrast",
+        "brightness",
+    ]
+    def __init__(self, img_path, label_path, output_size, **kwargs):
+        self.img_path = img_path
+        self.label_path = label_path
+        self.output_size = output_size
+        self.kwargs = kwargs
+        # Stereo
+        self.to_stereo = False
+        if "to_stereo" in kwargs.keys() and kwargs["to_stereo"]:
+            self.to_stereo = True
+        # Flip
+        self.flip = False
+        if "flip" in kwargs.keys() and kwargs["flip"]:
+            self.flip = True
+        # Color transformations
+        self.color_transformations = []
+        for k, v in self.kwargs.items():
+            if k in self.COLOR_TRANSFORMATIONS and v:
+                self.color_transformations.append(k)
+    def get(self):
+        # Load
+        img = cv2.imread(str(self.img_path))
+        label = cv2.imread(str(self.label_path))
+        # Size checking
+        assert img.shape == label.shape
+        # Flip
+        if self.flip:
+            img, label = self.apply_transformation("flip", img, label)
+        # Color transformations
+        for color_tr in self.color_transformations:
+            img, label = self.apply_transformation(color_tr, img, label)
+        # Numpy3333
+        if type(img) != np.ndarray:
+            img = np.array(img)
+        if type(label) != np.ndarray:
+            label = np.array(label)
+        # To stereo
+        if self.to_stereo:
+            img = np.concatenate((img, img), axis=1)
+            label = np.concatenate((label, label), axis=1)
+        # Size
+        img = cv2.resize(img, self.output_size[::-1])
+        label = cv2.resize(
+            label, self.output_size[::-1], interpolation=cv2.INTER_NEAREST
+        )
+        label = label[:, :, 0]
+        return img, label
+    @classmethod
+    def apply_transformation(cls, transformation, img, label):
+        if transformation == "flip":
+            return cls.tensor_to_numpy(
+                tf.image.flip_left_right(img)
+            ), cls.tensor_to_numpy(tf.image.flip_left_right(label))
+        elif transformation == "saturation":
+            return cls.tensor_to_numpy(tf.image.random_saturation(img, 0.5, 1.5)), label
+        elif transformation == "contrast":
+            return cls.tensor_to_numpy(tf.image.random_contrast(img, 0.5, 1.5)), label
+        elif transformation == "brightness":
+            return cls.tensor_to_numpy(tf.image.random_brightness(img, 0.3)), label
+        elif transformation == "rotate":
+            raise ValueError("This transformation is not supported yet")
+        elif transformation == "directed_crop":
+            raise ValueError("This transformation is not supported")
+    @staticmethod
+    def tensor_to_numpy(tensor):
+        if tf.executing_eagerly():
+            a = tensor.numpy()
+        else:
+            raise NotImplementedError(
+                "Please adapt the Data Generator to work when not executing eagerly"
+            )
+        return a
+class DataGenerator(keras.utils.Sequence):
+    def __init__(
+        self,
+        images_path,
+        labels_path,
+        n_classes,
+        batch_size=32,
+        output_size=(480, 640),
+        to_stereo=False,
+        flip=False,
+        saturation=False,
+        contrast=False,
+        brightness=False,
+        class_mappings=None,
+    ):
+        self.images_path = Path(images_path)
+        self.labels_path = Path(labels_path)
+        self.n_classes = n_classes
+        self.batch_size = batch_size
+        self.output_size = output_size
+        self.to_stereo = to_stereo
+        self.class_mappings = class_mappings
+        # Check image and labels dir
+        img_paths = sorted(list(self.images_path.iterdir()))
+        def has_label(img_filename):
+            return (self.labels_path / f"{img_filename.stem}.png").exists()
+        if not all(map(has_label, img_paths)):
+            raise FileNotFoundError("Check every image has a label")
+        # Obtain transformations
+        transformations = []
+        if flip:
+            transformations.append("flip")
+        if saturation:
+            transformations.append("saturation")
+        if contrast:
+            transformations.append("contrast")
+        if brightness:
+            transformations.append("brightness")
+        # Prepare augmentation
+        elements = []
+        for image_path in img_paths:
+            label_path = self.labels_path / f"{image_path.stem}.png"
+            elements.append(
+                ImageHelper(
+                    image_path,
+                    label_path,
+                    self.output_size,
+                    to_stereo=self.to_stereo,
+                )
+            )
+            for tr in transformations:
+                elements.append(
+                    ImageHelper(
+                        image_path,
+                        label_path,
+                        self.output_size,
+                        to_stereo=self.to_stereo,
+                        **{tr: True},
+                    )
+                )
+        self.elements = elements
+        # Shuffle
+        np.random.shuffle(self.elements)
+    def __getitem__(self, idx):
+        batch_elements = self.elements[
+            idx * self.batch_size : (idx + 1) * self.batch_size
+        ]
+        batch_elements_tuple = list(map(lambda x: x.get(), batch_elements))
+        X, y = zip(*batch_elements_tuple)
+        X, y = np.array(X), np.array(y)
+        y_onehot = np.zeros(y.shape + (self.n_classes,))
+        for i in np.unique(y):
+            i = int(i)
+            idx_for_this_class = np.where(y == i)
+            if self.class_mappings:
+                y_onehot[
+                    idx_for_this_class
+                    + (
+                        np.ones(len(idx_for_this_class[0]), dtype=int)
+                        * self.class_mappings[i],
+                    )
+                ] = 1
+            else:
+                y_onehot[
+                    idx_for_this_class
+                    + (np.ones(len(idx_for_this_class[0]), dtype=int) * i,)
+                ] = 1
+        final_X, final_y = X.astype(np.float64) / 255, y_onehot.astype(bool)
+        # assert final_X.shape[:-1] == final_y.shape[:-1]
+        return final_X, final_y
+    def get_item_name(self, idx):
+        return self.elements[idx].img_path.stem
+    def __len__(self):
+        try:
+            return np.int(len(self.elements) / self.batch_size)
+        except AttributeError:
+            return int(len(self.elements) / self.batch_size)
+    def on_epoch_end(self):
+        np.random.shuffle(self.elements)
+    @classmethod
+    def create_generators(
+        cls,
+        dataset_dir,
+        n_classes,
+        training_batch_size=32,
+        validation_batch_size=8,
+        output_size=(480, 640),
+        to_stereo=False,
+        transformations=tuple(),
+        class_mappings=None,
+    ):
+        """
+        Utily method to create both generators
+        Args:
+            dataset_dir: path of the dataset, must have training and val dirs
+            training_batch_size: batch size of the training generator
+            output_size: shape of the generated images
+            transformations: for data agumentations
+            to_stereo: whether the image and label must be converted to stereo
+            class_mappings: dict containing a mapping for each class
+        Returns: a tuple with the training and val genearators
+        """
+        dataset_dir = Path(dataset_dir)
+        training_generator = cls(
+            dataset_dir / "training" / "images",
+            dataset_dir / "training" / "labels",
+            n_classes,
+            batch_size=training_batch_size,
+            output_size=output_size,
+            to_stereo=to_stereo,
+            **{tr: True for tr in transformations},
+            class_mappings=class_mappings,
+        )
+        validation_generator = cls(
+            dataset_dir / "val" / "images",
+            dataset_dir / "val" / "labels",
+            n_classes,
+            batch_size=validation_batch_size,
+            output_size=output_size,
+            to_stereo=to_stereo,
+            **{tr: True for tr in transformations},
+            class_mappings=class_mappings,
+        )
+        return training_generator, validation_generator
+y_k_size = 6
+x_k_size = 6
+class BaseDataset(Dataset):
+    def __init__(
+        self,
+        ignore_label=255,
+        base_size=2048,
+        crop_size=(512, 1024),
+        scale_factor=16,
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225],
+    ):
+        self.base_size = base_size
+        self.crop_size = crop_size
+        self.ignore_label = ignore_label
+        self.mean = mean
+        self.std = std
+        self.scale_factor = scale_factor
+        self.files = []
+    def __len__(self):
+        return len(self.files)
+    def input_transform(self, image, city=True):
+        if city:
+            image = image.astype(np.float32)[:, :, ::-1]
+        else:
+            image = image.astype(np.float32)
+        image = image / 255.0
+        image -= self.mean
+        image /= self.std
+        return image
+    def label_transform(self, label):
+        return np.array(label).astype(np.uint8)
+    def pad_image(self, image, h, w, size, padvalue):
+        pad_image = image.copy()
+        pad_h = max(size[0] - h, 0)
+        pad_w = max(size[1] - w, 0)
+        if pad_h > 0 or pad_w > 0:
+            pad_image = cv2.copyMakeBorder(
+                image, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=padvalue
+            )
+        return pad_image
+    def rand_crop(self, image, label, edge):
+        h, w = image.shape[:-1]
+        image = self.pad_image(image, h, w, self.crop_size, (0.0, 0.0, 0.0))
+        label = self.pad_image(label, h, w, self.crop_size, (self.ignore_label,))
+        edge = self.pad_image(edge, h, w, self.crop_size, (0.0,))
+        new_h, new_w = label.shape
+        x = random.randint(0, new_w - self.crop_size[1])
+        y = random.randint(0, new_h - self.crop_size[0])
+        image = image[y : y + self.crop_size[0], x : x + self.crop_size[1]]
+        label = label[y : y + self.crop_size[0], x : x + self.crop_size[1]]
+        edge = edge[y : y + self.crop_size[0], x : x + self.crop_size[1]]
+        return image, label, edge
+    def multi_scale_aug(
+        self, image, label=None, edge=None, rand_scale=1, rand_crop=True
+    ):
+        long_size = np.int(self.base_size * rand_scale + 0.5)
+        h, w = image.shape[:2]
+        if h > w:
+            new_h = long_size
+            new_w = np.int(w * long_size / h + 0.5)
+        else:
+            new_w = long_size
+            new_h = np.int(h * long_size / w + 0.5)
+        image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+        if label is not None:
+            label = cv2.resize(label, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
+            if edge is not None:
+                edge = cv2.resize(edge, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
+        else:
+            return image
+        if rand_crop:
+            image, label, edge = self.rand_crop(image, label, edge)
+        return image, label, edge
+    def gen_sample(
+        self,
+        image,
+        label,
+        multi_scale=True,
+        is_flip=True,
+        edge_pad=True,
+        edge_size=4,
+        city=False,
+    ):
+        edge = cv2.Canny(label, 0.1, 0.2)
+        kernel = np.ones((edge_size, edge_size), np.uint8)
+        if edge_pad:
+            edge = edge[y_k_size:-y_k_size, x_k_size:-x_k_size]
+            edge = np.pad(
+                edge, ((y_k_size, y_k_size), (x_k_size, x_k_size)), mode="constant"
+            )
+        edge = (cv2.dilate(edge, kernel, iterations=1) > 50) * 1.0
+        if multi_scale:
+            rand_scale = 0.5 + random.randint(0, self.scale_factor) / 10.0
+            image, label, edge = self.multi_scale_aug(
+                image, label, edge, rand_scale=rand_scale
+            )
+        image = self.input_transform(image, city=city)
+        label = self.label_transform(label)
+        image = image.transpose((2, 0, 1))
+        if is_flip:
+            flip = np.random.choice(2) * 2 - 1
+            image = image[:, :, ::flip]
+            label = label[:, ::flip]
+            edge = edge[:, ::flip]
+        return image, label, edge
+    def inference(self, config, model, image):
+        size = image.size()
+        pred = model(image)
+        if config.MODEL.NUM_OUTPUTS > 1:
+            pred = pred[config.TEST.OUTPUT_INDEX]
+        pred = F.interpolate(
+            input=pred,
+            size=size[-2:],
+            mode="bilinear",
+            align_corners=config.MODEL.ALIGN_CORNERS,
+        )
+        return pred.exp()
+class PIDNetDataset(BaseDataset):
+    def __init__(
+        self,
+        images_path,
+        labels_path,
+        n_classes,
+        output_size=(480, 640),
+        to_stereo=False,
+        flip=False,
+        saturation=False,
+        contrast=False,
+        brightness=False,
+        class_mappings=None,
+        multi_scale=True,
+        ignore_label=255,
+        base_size=2048,
+        crop_size=(512, 1024),
+        scale_factor=16,
+        # mean=[0.485, 0.456, 0.406],
+        # std=[0.229, 0.224, 0.225],
+        mean=[0.342, 0.374, 0.416],
+        std=[0.241, 0.239, 0.253],
+        bd_dilate_size=4,
+    ):
+        super(PIDNetDataset, self).__init__(
+            ignore_label, base_size, crop_size, scale_factor, mean, std
+        )
+        self.images_path = Path(images_path)
+        self.labels_path = Path(labels_path)
+        self.n_classes = n_classes
+        self.output_size = output_size
+        self.to_stereo = to_stereo
+        self.class_mappings = class_mappings
+        self.bd_dilate_size = bd_dilate_size
+        self.multi_scale = multi_scale
+        self.flip = flip
+        # Check image and labels dir
+        img_paths = sorted(list(self.images_path.iterdir()))
+        def has_label(img_filename):
+            return (self.labels_path / f"{img_filename.stem}.png").exists()
+        if not all(map(has_label, img_paths)):
+            raise FileNotFoundError("Check every image has a label")
+        # Obtain transformations
+        transformations = []
+        # if flip:
+        #     transformations.append('flip')
+        if saturation:
+            transformations.append("saturation")
+        if contrast:
+            transformations.append("contrast")
+        if brightness:
+            transformations.append("brightness")
+        # Prepare augmentation
+        elements = []
+        for image_path in img_paths:
+            label_path = self.labels_path / f"{image_path.stem}.png"
+            elements.append(
+                ImageHelper(
+                    image_path,
+                    label_path,
+                    self.output_size,
+                    to_stereo=self.to_stereo,
+                )
+            )
+            for tr in transformations:
+                elements.append(
+                    ImageHelper(
+                        image_path,
+                        label_path,
+                        self.output_size,
+                        to_stereo=self.to_stereo,
+                        **{tr: True},
+                    )
+                )
+        self.elements = elements
+    def __len__(self):
+        return len(self.elements)
+    def __getitem__(self, idx):
+        element = self.elements[idx]
+        name = element.img_path.stem
+        X, y = element.get()
+        # Class mappings
+        if self.class_mappings:
+            y = np.vectorize(lambda x: self.class_mappings[x])(y).astype(np.uint8)
+        y_onehot = np.zeros(y.shape + (self.n_classes,))
+        for i in np.unique(y):
+            i = int(i)
+            idx_for_this_class = np.where(y == i)
+            if self.class_mappings:
+                y_onehot[
+                    idx_for_this_class
+                    + (
+                        np.ones(len(idx_for_this_class[0]), dtype=int)
+                        * self.class_mappings[i],
+                    )
+                ] = 1
+            else:
+                y_onehot[
+                    idx_for_this_class
+                    + (np.ones(len(idx_for_this_class[0]), dtype=int) * i,)
+                ] = 1
+        # assert final_X.shape[:-1] == final_y.shape[:-1]
+        image, label = X, y
+        image, label, edge = self.gen_sample(
+            image, label, self.multi_scale, self.flip, edge_size=self.bd_dilate_size
+        )
+        return image.copy(), label.copy(), edge.copy(), np.array(image.shape), name
+    @classmethod
+    def create_train_and_test_datasets(
+        cls,
+        dataset_dir,
+        n_classes,
+        output_size=(480, 640),
+        to_stereo=False,
+        transformations=tuple(),
+        class_mappings=None,
+    ):
+        dataset_dir = Path(dataset_dir)
+        training_generator = cls(
+            dataset_dir / "training" / "images",
+            dataset_dir / "training" / "labels",
+            n_classes,
+            output_size=output_size,
+            to_stereo=to_stereo,
+            **{tr: True for tr in transformations},
+            class_mappings=class_mappings,
+        )
+        validation_generator = cls(
+            dataset_dir / "val" / "images",
+            dataset_dir / "val" / "labels",
+            n_classes,
+            output_size=output_size,
+            to_stereo=to_stereo,
+            # **{tr: True for tr in transformations}
+            class_mappings=class_mappings,
+        )
+        return training_generator, validation_generator
+class MergedDataset(Dataset):
+    def __init__(self, *datasets):
+        self.datasets = datasets
+        for d in self.datasets:
+            assert isinstance(d, Dataset)
+        self.lens = [len(d) for d in self.datasets]
+        self.acc_lens = [sum(self.lens[: i + 1]) for i in range(len(self.lens))]
+    def __len__(self):
+        return sum(self.lens)
+    def __getitem__(self, idx):
+        for i in range(len(self.acc_lens)):
+            if idx < self.acc_lens[i]:
+                diff = self.acc_lens[i - 1] if i != 0 else 0
+                s = self.datasets[i][idx - diff]
+                # assert s[1].max() <= 3
+                # assert s[1].max() <= 3
+                return s
+        raise ValueError(
+            f"idx out of range, was {idx}, should be less than {self.__len__()}"
+        )
+if __name__ == "__main__":
+    """
+    dataset_dir = Path('/home/user/nas/Datasets/egocentric_segmentation/joint-ep-of-thu-ego-for-5-office-objects/')
+    helper = ImageHelper(
+        dataset_dir / 'training' / 'images' / 'L515_020_003_rgb_0246.jpg',
+        dataset_dir / 'training' / 'labels' / 'L515_020_003_rgb_0246.png',
+        (480, 640),
+        to_stereo=True
+    )
+    image, label = helper.get()
+    """
+    gen = DataGenerator(
+        Path(
+            "C:/Users/xruser/RealTimeSemanticSegmentation/joint-ep-of-thu-ego-stereo-1280x480/joint-ep-of-thu-ego-stereo-1280x480/"
+        )
+        / "pruned_training"
+        / "images",
+        Path(
+            "C:/Users/xruser/RealTimeSemanticSegmentation/joint-ep-of-thu-ego-stereo-1280x480/joint-ep-of-thu-ego-stereo-1280x480/"
+        )
+        / "pruned_training"
+        / "labels",
+        7,
+        batch_size=4,
+        to_stereo=True,
+    )
+    images, labels = gen[0]
+    print("hola")

images_toolkit.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import argparse
+import tensorflow as tf
+import sys
+import os
+import numpy as np
+import cv2
+from random import random
+import scipy.misc
+from glob import glob
+import matplotlib.pyplot as plt
+import math
+def make_image_square(image):
+    if image.shape[0] > image.shape[1]:
+        shift = int((image.shape[0] - image.shape[1]) / 2)
+        print(shift)
+        small_dim = image.shape[1]
+        image = image[shift : shift + small_dim, :, :]
+    else:
+        shift = int((image.shape[1] - image.shape[0]) / 2)
+        print(shift)
+        small_dim = image.shape[0]
+        image = image[:, shift : shift + small_dim]
+    return image
+def mix_with_background(path_background, frame, fg):
+    images = [img for img in os.listdir(path_background) if img.endswith(".jpg")]
+    # Assure that there is a binary image
+    # fg[np.where(fg > 150)] = 255
+    # fg[np.where(fg < 150)] = 0
+    # fg[np.where(fg == 1)] = 255
+    # fg[np.where(fg != 1)] = 0
+    upper_limit = 0
+    lower_limit = len(images) - 1
+    num = np.uint8(random() * (upper_limit - lower_limit) + lower_limit)
+    # print(images[num])
+    bkg = cv2.imread(os.path.join(path_background, images[num]))
+    # bkg = cv2.cvtColor(bkg, cv2.COLOR_BGR2RGB)
+    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    [index_r, index_c] = np.where((fg[:, :] >= 200))
+    # [index_r, index_c] = np.where((fg[:,:] == 1))
+    for i in range(1, len(index_r)):
+        bkg[index_r[i], index_c[i], 0] = frame[index_r[i], index_c[i], 0]
+        bkg[index_r[i], index_c[i], 1] = frame[index_r[i], index_c[i], 1]
+        bkg[index_r[i], index_c[i], 2] = frame[index_r[i], index_c[i], 2]
+    return bkg
+def equalize(image):
+    output = np.zeros((image.shape[0], image.shape[1]))
+    img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
+    # This configuration achieves a very slight equalization
+    clahe = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(1, 1))
+    img_yuv[:, :, 0] = clahe.apply(img_yuv[:, :, 0])
+    output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
+    return output
+def show_image(image, text=None):
+    plt.imshow(image)
+    plt.show()
+    if text is not None:
+        plt.title(text)
+def show_two_images(image, image2, text=None, cmap=None, horizontal=True):
+    plt.figure(1)
+    if horizontal:
+        plt.subplot(121)
+    else:
+        plt.subplot(211)
+    if cmap:
+        plt.imshow(image, cmap=cmap)
+    else:
+        plt.imshow(cv2.cvtColor(image.astype("float32"), cv2.COLOR_BGR2RGB))
+    if text is not None:
+        plt.title(text)
+    if horizontal:
+        plt.subplot(122)
+    else:
+        plt.subplot(212)
+    if cmap:
+        plt.imshow(image2, cmap=cmap)
+    else:
+        plt.imshow(cv2.cvtColor(image2.astype("float32"), cv2.COLOR_BGR2RGB))
+    plt.show()
+def show_three_images(image, image2, image3, text=None):
+    plt.figure(1)
+    plt.subplot(311)
+    plt.imshow(image)
+    if text is not None:
+        plt.title(text)
+    plt.subplot(312)
+    # plt.imshow(image2,cmap='gray', vmin=0, vmax=1)
+    plt.imshow(image2)
+    plt.subplot(313)
+    # plt.imshow(image3,cmap='gray', vmin=0, vmax=1)
+    plt.imshow(image3)
+    plt.show()
+def show_four_images(image, image2, image3, image4, text=None):
+    plt.figure(1)
+    plt.subplot(221)
+    plt.imshow(image)
+    if text is not None:
+        plt.title(text)
+    plt.subplot(222)
+    plt.imshow(image2)
+    # plt.imshow(image2, cmap='gray', vmin=0, vmax=1)
+    plt.subplot(223)
+    plt.imshow(image3)
+    # plt.imshow(image3, cmap='gray', vmin=0, vmax=1)
+    plt.subplot(224)
+    plt.imshow(image4)
+    # plt.imshow(image4, cmap='gray', vmin=0, vmax=1)
+    plt.show()
+def show_six_images(image, image2, image3, image4, image5, image6, text=None):
+    plt.figure(1)
+    plt.subplot(231)
+    plt.imshow(image)
+    if text is not None:
+        plt.title(text)
+    plt.subplot(232)
+    plt.imshow(image2, cmap="gray", vmin=0, vmax=1)
+    plt.subplot(233)
+    plt.imshow(image3, cmap="gray", vmin=0, vmax=1)
+    plt.subplot(234)
+    plt.imshow(image4, cmap="gray", vmin=0, vmax=1)
+    plt.subplot(235)
+    plt.imshow(image5, cmap="gray", vmin=0, vmax=1)
+    plt.subplot(236)
+    plt.imshow(image6, cmap="gray", vmin=0, vmax=1)
+    plt.show()
+def show_image_per_channel(image, text):
+    plt.figure(1)
+    plt.subplot(311)
+    plt.imshow(image[:, :, 0])
+    plt.subplot(312)
+    plt.imshow(image[:, :, 1])
+    plt.subplot(313)
+    plt.imshow(image[:, :, 2])
+    plt.show()
+    if text is not None:
+        plt.title(text)
+def pixels_to_labels(image):
+    print(image.shape)
+    output = np.zeros((image.shape[0], image.shape[1]))
+    output[np.where(image > 200)] = 255
+    output[np.where(image < 200)] = 0
+def jpg_to_png(path):
+    image_paths = glob(os.path.join(path, "*.jpg"))
+    for i in range(0, len(image_paths)):
+        print(i)
+        name = image_paths[i]
+        idx = name.rfind("/")
+        image = scipy.misc.imread(image_paths[i])
+        scipy.misc.imsave(
+            os.path.join(path, name[idx + 1 : -4] + ".png"), image.astype(np.uint8)
+        )  # Really important to convert to uint8
+def jpeg_to_jpg(path):
+    image_paths = glob(os.path.join(path, "*.jpeg"))
+    for i in range(0, len(image_paths)):
+        print(i)
+        name = image_paths[i]
+        idx = name.rfind("/")
+        image = scipy.misc.imread(image_paths[i])
+        scipy.misc.imsave(
+            os.path.join(path, name[idx + 1 : -5] + ".jpg"), image
+        )  # Really important to convert to uint8
+def overlap_image_with_label(image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+    """
+    This function overlaps the mask with the image.
+    In other words, it only plots the image where the
+    segmentation mask is >= 1
+    Args:
+    - image (numpy array): RGB image of shape (1, 480, 640, 3)
+    - label (numpy array): segmentation mask of shape (480, 640)
+    Returns:
+    - overlap (numpy array): overlapped image
+    """
+    binary_mask = (mask > 0).astype(np.uint8)
+    overlapped = image.copy().squeeze(0)
+    overlapped[binary_mask == 0] = 0
+    return overlapped
+def show_x_images(images, titles=None, cmap=None, horizontal=False):
+    num_images = len(images)
+    if num_images == 1:
+        plt.imshow(images[0])
+        if titles:
+            plt.title(titles[0])
+        plt.show()
+        return
+    if horizontal:
+        cols = num_images
+        rows = 1
+    else:
+        cols = math.ceil(math.sqrt(num_images))
+        rows = math.ceil(num_images / cols)
+    plt.figure(figsize=(15, 5))
+    for i, image in enumerate(images):
+        plt.subplot(rows, cols, i + 1)
+        if cmap:
+            plt.imshow(image, cmap=cmap)
+        else:
+            plt.imshow(cv2.cvtColor(image.astype("float32"), cv2.COLOR_BGR2RGB))
+        if titles and i < len(titles):
+            plt.title(titles[i])
+    plt.show()
+    return

inference_config.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import time
+import sys
+from data_gen import DataGenerator
+from model.model import Thundernet as Thundernet_original
+from model.model_ppm_factors import Thundernet as Thundernet_ppm
+from collections import defaultdict
+import thundernet_config as Thundernet_config
+import numpy as np
+import argparse
+from glob import glob
+from utils import resolution2framesize3cha, simple_iou_for_multiple_classes, image_test
+import tqdm
+from pathlib import Path
+import matplotlib.pyplot as plt
+from images_toolkit import show_two_images, overlap_image_with_label, show_x_images
+# Example command: python inference_config.py --model_path C:/Users/user/Documents/Thundernet/pruebas_modelos/32_ppm/BS4_lossBCE_weights_lr_0.00013713842558297858_reg-1.1743577101671763e-05-ep-13-val_loss0.11463435739278793-train_loss0.053004469722509384-val_iou0.8959722518920898-train_iou0.9606077075004578.hdf5 --classes 2
+baseline_duration = None
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--model_path",
+    type=str,
+    default=Thundernet_config.model_weights,
+    help="Base directory for the hdf5 model, they are usually stored is /home/user/nas/deep_experiments/",
+)
+parser.add_argument(
+    "--classes", type=int, default=Thundernet_config.classes, help="Number of classes. "
+)
+parser.add_argument(
+    "--resolution",
+    type=str,
+    default=Thundernet_config.resolution,
+    help="Input Resolution",
+)
+def main(
+    args: list,
+    model: str = "original",
+    class_mappings: dict = None,
+    transformations: tuple = tuple(),
+    show: bool = False,
+) -> None:
+    """
+    Perform inference in a set of images. If show=True, each prediction
+    will be shown in the screen.
+    Args:
+     - args (list): list of parsed arguments
+     - model (str): type of model. Default: "original"
+     - class_mappings (dict): class mapper. Default: None
+     - transformations (tuple): list of transformations to execute in the data. Default: tuple()
+     - show (bool): display the predictions. Default: False
+    Returns:
+    - None
+    """
+    FLAGS: argparse.Namespace = parser.parse_args(args)
+    # Get the model
+    if model == "original":
+        Thundernet = Thundernet_original
+    elif model == "ppm":
+        Thundernet = Thundernet_ppm
+    else:
+        raise ValueError(f"Unknown model: {model}")
+    # Set class mapping
+    if class_mappings is not None:
+        FLAGS.classes = len(set(class_mappings.values())) + 1
+    # Get the shape and the classes
+    input_shape = resolution2framesize3cha(FLAGS.resolution)
+    classes = FLAGS.classes
+    # Initialize the model with loaded weights
+    try:
+        thundernet = Thundernet(
+            input_shape=input_shape, resnet_trainable=False, n_classes=classes
+        )
+        model = thundernet.model
+    except ValueError:
+        if model == "ppm":
+            Thundernet = Thundernet_original
+        else:
+            Thundernet = Thundernet_ppm
+        thundernet = Thundernet(
+            input_shape=input_shape, resnet_trainable=False, n_classes=classes
+        )
+        model = thundernet.model
+    thundernet.model.load_weights(FLAGS.model_path)
+    # Create dataloader for data
+    dataset_dir: Path = Path(Thundernet_config.train_path).parent
+    validation_generator: DataGenerator
+    _, validation_generator = DataGenerator.create_generators(
+        dataset_dir,
+        FLAGS.classes,
+        training_batch_size=1,
+        validation_batch_size=1,
+        to_stereo=False,
+        transformations=transformations,
+        class_mappings=class_mappings,
+    )
+    # Initilize lists to save data
+    iou_aux: list = []
+    iou_global: list = []
+    durations: list = []
+    # Iterate through the generator to get the iou metrics
+    for i in tqdm.tqdm(range(len(validation_generator))):
+        X, y = validation_generator[i]
+        start_t = time.perf_counter()
+        pred = model.predict(X)  # Shape: [1, 480, 640, 2]
+        duration = time.perf_counter() - start_t
+        durations.append(1000 * duration)
+        pred = pred[0, :, :, :]  # Shape [480, 640, 2]
+        prediction = np.argmax(pred, axis=2)  # Shape [480, 640]
+        label = y[0].argmax(axis=-1) * 255
+        if show:
+            label_RGB = overlap_image_with_label(X, label)
+            prediction_RGB = overlap_image_with_label(X, prediction)
+            show_x_images(
+                images=[label_RGB, prediction_RGB],
+                titles=["Real", "Prediction"],
+                horizontal=True,
+            )
+        iou_simple_iou = simple_iou_for_multiple_classes(
+            y[0].argmax(axis=-1), prediction, classes
+        )
+        iou_global.append(iou_simple_iou)
+        iou_aux = np.array(iou_global)
+        name_image = validation_generator.get_item_name(i)
+    for i in range(0, classes + 1):
+        if classes <= 3 and i == classes:
+            break
+        values = iou_aux[:, i]
+        values = values[~np.isnan(values)]
+        print("IoU for class=", i, "is ", np.mean(values))
+    durations = np.array(durations)
+    print("")
+    print("INFERENCE TIME")
+    print(f" - Mean: {np.mean(durations)}")
+    print(f" - Std: {np.std(durations)}")
+    if baseline_duration:
+        durations_baseline = np.load(Path(baseline_duration).open("rb"))
+        diff_durations = durations - durations_baseline
+        print("INFERENCE TIME WITH RESPECT TO BASELINE (ABSOLUTE)")
+        print(f" - Mean: {np.mean(diff_durations)}")
+        print(f" - Std: {np.std(diff_durations)}")
+        increase_durations = (durations - durations_baseline) / durations_baseline
+        print("INFERENCE TIME WITH RESPECT TO BASELINE (RELATIVE)")
+        print(f" - Mean: {np.mean(increase_durations)}")
+        print(f" - Std: {np.std(increase_durations)}")
+if __name__ == "__main__":
+    main(sys.argv[1:], model="ppm", class_mappings=defaultdict(int, {1: 1}))
+    # main(sys.argv[1:], model="original", class_mappings=defaultdict(int, {1: 1}), show=False)

model/model.py ADDED Viewed

	@@ -0,0 +1,372 @@

+from tensorflow.keras.layers import (
+    Input,
+    Lambda,
+    Concatenate,
+    Conv2D,
+    Conv2DTranspose,
+    MaxPooling2D,
+    BatchNormalization,
+    Activation,
+    Add,
+    AveragePooling2D,
+    UpSampling2D,
+    SeparableConv2D,
+    SpatialDropout2D,
+)
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers imorport ConvLSTM2D
+from tensorflow.keras import callbacks
+import tensorflow.keras.optimizers
+from tensorflow.keras.regularizers import l2
+from tensorflow.python import pywrap_tensorflow
+import tensorflow as tf
+class Thundernet:
+    def __init__(
+        self,
+        input_shape=(512, 1024, 3),
+        resnet_trainable=False,
+        kernel_regularizer=0,
+        n_classes=38,
+    ):
+        self.input_shape = input_shape
+        self.resnet_trainable = resnet_trainable
+        self.n_classes = n_classes
+        self.model = self.thundernet(input_shape, resnet_trainable, kernel_regularizer)
+        self.load_resnet_weights()
+    def resnet_layer(
+        self,
+        inp,
+        downsample_first=True,
+        filters=64,
+        first=False,
+        number=0,
+        resnet_trainable=False,
+        kernel_regularizer=0,
+    ):
+        if downsample_first:
+            conv_1 = Conv2D(
+                filters,
+                kernel_size=3,
+                strides=2,
+                padding="same",
+                name="conv2d_" + str(2 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+        else:
+            conv_1 = Conv2D(
+                filters,
+                kernel_size=3,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(2 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+        bn_1 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(1 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_1)
+        relu_1 = Activation("relu")(bn_1)
+        conv_2 = Conv2D(
+            filters,
+            kernel_size=3,
+            strides=1,
+            padding="same",
+            name="conv2d_" + str(3 + (number - 1) * 5),
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(relu_1)
+        bn_2 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(2 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_2)
+        if downsample_first:
+            shortcut_1 = Conv2D(
+                filters,
+                kernel_size=1,
+                strides=2,
+                padding="same",
+                name="conv2d_" + str(1 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+            # bn_short = BatchNormalization(axis = 3, name = 'batch_normalization_' + str(1+(number-1)*5))(shortcut_1)
+            joint = Add()([shortcut_1, bn_2])
+        elif first:
+            shortcut_1 = Conv2D(
+                filters,
+                kernel_size=1,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(1 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+            # bn_short = BatchNormalization(axis=3, name = 'batch_normalization_' + str(1+(number-1)*5))(shortcut_1)
+            joint = Add()([shortcut_1, bn_2])
+        else:
+            joint = Add()([inp, bn_2])
+        block_1 = Activation("relu")(joint)
+        conv_3 = Conv2D(
+            filters,
+            kernel_size=3,
+            strides=1,
+            padding="same",
+            name="conv2d_" + str(4 + (number - 1) * 5),
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(block_1)
+        bn_3 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(3 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_3)
+        relu_3 = Activation("relu")(bn_3)
+        conv_4 = Conv2D(
+            filters,
+            kernel_size=3,
+            strides=1,
+            padding="same",
+            name="conv2d_" + str(5 + (number - 1) * 5),
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(relu_3)
+        bn_4 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(4 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_4)
+        joint_2 = Add()([block_1, bn_4])
+        out = Activation("relu")(joint_2)
+        return out
+    def pyramid_pooling_block(self, input_tensor, number=0, kernel_regularizer=0):
+        concat_list = []
+        w = input_tensor.shape[1]
+        h = input_tensor.shape[2]
+        if w == None:
+            w = 45
+        if h == None:
+            h = 45
+        k = 0
+        import tensorflow as tf
+        for bin_size in [6, 12]:
+            x = AveragePooling2D(
+                pool_size=(w // bin_size, h // bin_size),
+                strides=(w // bin_size, h // bin_size),
+            )(input_tensor)
+            x = Conv2D(
+                512,
+                kernel_size=1,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(number + k),
+                kernel_regularizer=l2(kernel_regularizer),
+            )(x)
+            x = Lambda(lambda x: tf.image.resize(x, (w, h)))(x)
+            concat_list.append(x)
+            k += 1
+        for bin_size in [18, 24]:
+            x = AveragePooling2D(
+                pool_size=(w // bin_size, h // bin_size),
+                strides=(w // bin_size, h // bin_size),
+            )(input_tensor)
+            x = Conv2D(
+                256,
+                kernel_size=1,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(number + k),
+                kernel_regularizer=l2(kernel_regularizer),
+            )(x)
+            x = Lambda(lambda x: tf.image.resize(x, (w, h)))(x)
+            concat_list.append(x)
+            k += 1
+        ppm = Concatenate()(concat_list)
+        conv = Conv2D(
+            256,
+            kernel_size=1,
+            name="conv2d_" + str(number + k),
+            kernel_regularizer=l2(kernel_regularizer),
+        )(ppm)
+        out = Activation("relu")(conv)
+        return out
+    def decoder_block(self, inp, filters, number=0, kernel_regularizer=0):
+        #    filters = inp.shape[3]
+        conv_1 = Conv2D(
+            filters,
+            kernel_size=1,
+            name="conv2d_" + str(number),
+            kernel_regularizer=l2(kernel_regularizer),
+        )(inp)
+        # conv_1 = SeparableConv2D(filters, kernel_size=1, name='conv2d_' + str(number), kernel_regularizer=l2(kernel_regularizer))(inp)
+        deconv = Conv2DTranspose(filters, kernel_size=3, strides=2, padding="same")(
+            conv_1
+        )
+        bn_1 = BatchNormalization(axis=3, name="batch_normalization_" + str(number))(
+            deconv
+        )
+        conv_2 = Conv2D(
+            filters // 2,
+            kernel_size=1,
+            name="conv2d_" + str(number + 1),
+            kernel_regularizer=l2(kernel_regularizer),
+        )(bn_1)
+        # conv_2 = SeparableConv2D(filters // 2, kernel_size=1, name='conv2d_' + str(number + 1), kernel_regularizer=l2(kernel_regularizer))(bn_1)
+        bn_2 = BatchNormalization(
+            axis=3, name="batch_normalization_" + str(number + 1)
+        )(conv_2)
+        inp_deconv = Conv2DTranspose(
+            filters // 2, kernel_size=3, strides=2, padding="same"
+        )(inp)
+        inp_bn = BatchNormalization(
+            axis=3, name="batch_normalization_" + str(number + 2)
+        )(inp_deconv)
+        joint = Add()([inp_bn, bn_2])
+        out = Activation("relu")(joint)
+        return out
+    def thundernet(
+        self, input_shape=(512, 1024, 3), resnet_trainable=False, kernel_regularizer=0
+    ):
+        # This returns a tensor
+        inputs = Input(shape=(input_shape))
+        # a layer instance is callable on a tensor, and returns a tensor
+        conv_1 = Conv2D(
+            64,
+            kernel_size=3,
+            strides=2,
+            padding="same",
+            name="conv2d",
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(inputs)
+        bn_1 = BatchNormalization(
+            axis=3, name="batch_normalization", trainable=resnet_trainable
+        )(conv_1)
+        relu_1 = Activation("relu")(bn_1)
+        maxp_1 = MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")(relu_1)
+        res1 = self.resnet_layer(
+            maxp_1,
+            downsample_first=False,
+            filters=64,
+            first=True,
+            number=1,
+            resnet_trainable=resnet_trainable,
+            kernel_regularizer=kernel_regularizer,
+        )
+        # res1 = SpatialDropout2D(0.25)(res1)
+        res2 = self.resnet_layer(
+            res1,
+            downsample_first=True,
+            filters=128,
+            first=False,
+            number=2,
+            resnet_trainable=resnet_trainable,
+            kernel_regularizer=kernel_regularizer,
+        )
+        # res2 = SpatialDropout2D(0.25)(res2)
+        res3 = self.resnet_layer(
+            res2,
+            downsample_first=True,
+            filters=256,
+            first=False,
+            number=3,
+            resnet_trainable=resnet_trainable,
+            kernel_regularizer=kernel_regularizer,
+        )
+        ppm = self.pyramid_pooling_block(
+            res3, number=16, kernel_regularizer=kernel_regularizer
+        )
+        # ppm = Add()([ppm,res3])
+        ppm = Concatenate()([ppm, res3])
+        dec_1 = self.decoder_block(
+            ppm, 256, number=21, kernel_regularizer=kernel_regularizer
+        )
+        # dec_1 = Add()([dec_1, res2])
+        dec_1 = Concatenate()([dec_1, res2])
+        dec_2 = self.decoder_block(
+            dec_1, 128, number=24, kernel_regularizer=kernel_regularizer
+        )
+        # dec_2 = Add()([dec_2, res1])
+        dec_2 = Concatenate()([dec_2, res1])
+        # dec_3 = self.decoder_block(dec_2, 128, number=27)
+        ups = UpSampling2D(size=(4, 4), interpolation="bilinear")(dec_2)
+        # ups = UpSampling2D(size=(2, 2), interpolation='bilinear')(dec_3)
+        out = Conv2D(
+            filters=int(self.n_classes),
+            kernel_size=1,
+            activation="softmax",
+            name="conv2d_out",
+        )(ups)
+        model = Model(inputs=inputs, outputs=out)
+        return model
+    def load_resnet_weights(self):
+        print("Loading weights for resnet18 backbone")
+        checkpoint_path = "./resnet/resnet18/checkpoints/model/model.ckpt-5865"
+        reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)
+        var_to_shape_map = reader.get_variable_to_shape_map()
+        # for key in var_to_shape_map:
+        #     print("tensor_name: ", key)
+        #     print(reader.get_tensor(key).shape)  # Remove this is you want to print only variable names
+        for k in range(0, 16):
+            layer_name = "conv2d"
+            if k != 0:
+                layer_name += "_" + str(k)
+            weights_key = layer_name + "/kernel"
+            weights = reader.get_tensor(weights_key)
+            keras_weights = self.model.get_layer(layer_name).get_weights()
+            self.model.get_layer(layer_name).set_weights([weights])
+            layer_name = "batch_normalization"
+            if k != 0:
+                layer_name += "_" + str(k)
+            if k < 13:
+                beta_key = layer_name + "/beta"
+                beta = reader.get_tensor(beta_key)
+                gamma_key = layer_name + "/gamma"
+                gamma = reader.get_tensor(gamma_key)
+                mean_key = layer_name + "/moving_mean"
+                mean = reader.get_tensor(mean_key)
+                var_key = layer_name + "/moving_variance"
+                var = reader.get_tensor(var_key)
+                keras_weights = self.model.get_layer(layer_name).get_weights()
+                self.model.get_layer(layer_name).set_weights([gamma, beta, mean, var])
+        print("Weights for resnet18 backbone loaded!")

model/model_ppm_factors.py ADDED Viewed

	@@ -0,0 +1,438 @@

+from tensorflow.keras.layers import (
+    Input,
+    Lambda,
+    Concatenate,
+    Conv2D,
+    Conv2DTranspose,
+    MaxPooling2D,
+    BatchNormalization,
+    Activation,
+    Add,
+    AveragePooling2D,
+    UpSampling2D,
+    SeparableConv2D,
+    SpatialDropout2D,
+)
+from tensorflow.keras.models import Model
+from keras import callbacks
+import keras.optimizers
+from tensorflow.keras.regularizers import l2
+import tensorflow as tf
+from tensorflow.python import pywrap_tensorflow
+class Thundernet:
+    def __init__(
+        self,
+        input_shape=(512, 1024, 3),
+        resnet_trainable=False,
+        kernel_regularizer=0,
+        n_classes=2,
+        add_2x1up_layer=False,
+        add_2up_layer=False,
+        resize_first=False,
+    ):
+        self.input_shape = input_shape
+        self.resnet_trainable = resnet_trainable
+        self.n_classes = n_classes
+        self.model = self.thundernet(
+            input_shape,
+            resnet_trainable,
+            kernel_regularizer,
+            add_2x1up_layer,
+            add_2up_layer,
+            resize_first,
+        )
+        self.load_resnet_weights()
+        self.add_2x1up_layer = add_2x1up_layer
+        self.add_2up_layer = add_2up_layer
+        self.resize_first = resize_first
+    def resnet_layer(
+        self,
+        inp,
+        downsample_first=True,
+        filters=64,
+        first=False,
+        number=0,
+        resnet_trainable=False,
+        kernel_regularizer=0,
+    ):
+        if downsample_first:
+            conv_1 = Conv2D(
+                filters,
+                kernel_size=3,
+                strides=2,
+                padding="same",
+                name="conv2d_" + str(2 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+        else:
+            conv_1 = Conv2D(
+                filters,
+                kernel_size=3,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(2 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+        bn_1 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(1 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_1)
+        relu_1 = Activation("relu")(bn_1)
+        conv_2 = Conv2D(
+            filters,
+            kernel_size=3,
+            strides=1,
+            padding="same",
+            name="conv2d_" + str(3 + (number - 1) * 5),
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(relu_1)
+        bn_2 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(2 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_2)
+        if downsample_first:
+            shortcut_1 = Conv2D(
+                filters,
+                kernel_size=1,
+                strides=2,
+                padding="same",
+                name="conv2d_" + str(1 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+            # bn_short = BatchNormalization(axis = 3, name = 'batch_normalization_' + str(1+(number-1)*5))(shortcut_1)
+            joint = Add()([shortcut_1, bn_2])
+        elif first:
+            shortcut_1 = Conv2D(
+                filters,
+                kernel_size=1,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(1 + (number - 1) * 5),
+                use_bias=False,
+                trainable=resnet_trainable,
+                kernel_regularizer=l2(kernel_regularizer),
+            )(inp)
+            # bn_short = BatchNormalization(axis=3, name = 'batch_normalization_' + str(1+(number-1)*5))(shortcut_1)
+            joint = Add()([shortcut_1, bn_2])
+        else:
+            joint = Add()([inp, bn_2])
+        block_1 = Activation("relu")(joint)
+        conv_3 = Conv2D(
+            filters,
+            kernel_size=3,
+            strides=1,
+            padding="same",
+            name="conv2d_" + str(4 + (number - 1) * 5),
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(block_1)
+        bn_3 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(3 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_3)
+        relu_3 = Activation("relu")(bn_3)
+        conv_4 = Conv2D(
+            filters,
+            kernel_size=3,
+            strides=1,
+            padding="same",
+            name="conv2d_" + str(5 + (number - 1) * 5),
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(relu_3)
+        bn_4 = BatchNormalization(
+            axis=3,
+            name="batch_normalization_" + str(4 + (number - 1) * 4),
+            trainable=resnet_trainable,
+        )(conv_4)
+        joint_2 = Add()([block_1, bn_4])
+        out = Activation("relu")(joint_2)
+        return out
+    def pyramid_pooling_block(self, input_tensor, number=0, kernel_regularizer=0):
+        concat_list = []
+        # w = input_tensor.shape[1].value
+        # h = input_tensor.shape[2].value
+        w = input_tensor.shape[1]
+        h = input_tensor.shape[2]
+        if w == None:
+            w = 45
+        if h == None:
+            h = 45
+        k = 0
+        for bin_size in [1, 3, 6]:
+            x = AveragePooling2D(
+                pool_size=(w // bin_size, h // bin_size),
+                strides=(w // bin_size, h // bin_size),
+            )(input_tensor)
+            x = Conv2D(
+                512,
+                kernel_size=1,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(number + k),
+                kernel_regularizer=l2(kernel_regularizer),
+            )(x)
+            x = Lambda(lambda x: tf.image.resize(x, (w, h)))(x)
+            concat_list.append(x)
+            k += 1
+        for bin_size in [12, 18, 24]:
+            x = AveragePooling2D(
+                pool_size=(w // bin_size, h // bin_size),
+                strides=(w // bin_size, h // bin_size),
+            )(input_tensor)
+            x = Conv2D(
+                256,
+                kernel_size=1,
+                strides=1,
+                padding="same",
+                name="conv2d_" + str(number + k),
+                kernel_regularizer=l2(kernel_regularizer),
+            )(x)
+            x = Lambda(lambda x: tf.image.resize(x, (w, h)))(x)
+            concat_list.append(x)
+            k += 1
+        ppm = Concatenate()(concat_list)
+        conv = Conv2D(
+            256,
+            kernel_size=1,
+            name="conv2d_" + str(number + k),
+            kernel_regularizer=l2(kernel_regularizer),
+        )(ppm)
+        out = Activation("relu")(conv)
+        return out
+    def decoder_block(self, inp, filters, number=0, kernel_regularizer=0):
+        #    filters = inp.shape[3]
+        conv_1 = Conv2D(
+            filters,
+            kernel_size=1,
+            name="conv2d_" + str(number),
+            kernel_regularizer=l2(kernel_regularizer),
+        )(inp)
+        # conv_1 = SeparableConv2D(filters, kernel_size=1, name='conv2d_' + str(number), kernel_regularizer=l2(kernel_regularizer))(inp)
+        deconv = Conv2DTranspose(filters, kernel_size=3, strides=2, padding="same")(
+            conv_1
+        )
+        bn_1 = BatchNormalization(axis=3, name="batch_normalization_" + str(number))(
+            deconv
+        )
+        conv_2 = Conv2D(
+            filters // 2,
+            kernel_size=1,
+            name="conv2d_" + str(number + 1),
+            kernel_regularizer=l2(kernel_regularizer),
+        )(bn_1)
+        # conv_2 = SeparableConv2D(filters // 2, kernel_size=1, name='conv2d_' + str(number + 1), kernel_regularizer=l2(kernel_regularizer))(bn_1)
+        bn_2 = BatchNormalization(
+            axis=3, name="batch_normalization_" + str(number + 1)
+        )(conv_2)
+        inp_deconv = Conv2DTranspose(
+            filters // 2, kernel_size=3, strides=2, padding="same"
+        )(inp)
+        inp_bn = BatchNormalization(
+            axis=3, name="batch_normalization_" + str(number + 2)
+        )(inp_deconv)
+        joint = Add()([inp_bn, bn_2])
+        out = Activation("relu")(joint)
+        return out
+    def thundernet(
+        self,
+        input_shape=(512, 1024, 3),
+        resnet_trainable=False,
+        kernel_regularizer=0,
+        add_2x1up_layer=False,
+        add_2up_layer=False,
+        resize_first=False,
+    ):
+        # This returns a tensor
+        inputs = Input(shape=(input_shape))
+        if resize_first:
+            # Lambda are needed so that you can have
+            # aux = Lambda(lambda x: tf.image.resize_images(x, (480, 640)))(inputs)
+            aux = Lambda(
+                lambda x: tf.image.resize(
+                    x, (inputs.shape[0] // 2, inputs.shape[1] // 2)
+                )
+            )(inputs)
+        else:
+            aux = inputs
+        # a layer instance is callable on a tensor, and returns a tensor
+        conv_1 = Conv2D(
+            64,
+            kernel_size=3,
+            strides=2,
+            padding="same",
+            name="conv2d",
+            use_bias=False,
+            trainable=resnet_trainable,
+            kernel_regularizer=l2(kernel_regularizer),
+        )(aux)
+        bn_1 = BatchNormalization(
+            axis=3, name="batch_normalization", trainable=resnet_trainable
+        )(conv_1)
+        relu_1 = Activation("relu")(bn_1)
+        maxp_1 = MaxPooling2D(pool_size=(3, 3), strides=2, padding="same")(relu_1)
+        res1 = self.resnet_layer(
+            maxp_1,
+            downsample_first=False,
+            filters=64,
+            first=True,
+            number=1,
+            resnet_trainable=resnet_trainable,
+            kernel_regularizer=kernel_regularizer,
+        )
+        # res1 = SpatialDropout2D(0.25)(res1)
+        res2 = self.resnet_layer(
+            res1,
+            downsample_first=True,
+            filters=128,
+            first=False,
+            number=2,
+            resnet_trainable=resnet_trainable,
+            kernel_regularizer=kernel_regularizer,
+        )
+        # res2 = SpatialDropout2D(0.25)(res2)
+        res3 = self.resnet_layer(
+            res2,
+            downsample_first=True,
+            filters=256,
+            first=False,
+            number=3,
+            resnet_trainable=resnet_trainable,
+            kernel_regularizer=kernel_regularizer,
+        )
+        ppm = self.pyramid_pooling_block(
+            res3, number=16, kernel_regularizer=kernel_regularizer
+        )
+        # ppm = Add()([ppm,res3])
+        ppm = Concatenate()([ppm, res3])
+        0
+        dec_1 = self.decoder_block(
+            ppm, 256, number=30, kernel_regularizer=kernel_regularizer
+        )
+        # dec_1 = Add()([dec_1, res2])
+        dec_1 = Concatenate()([dec_1, res2])
+        dec_2 = self.decoder_block(
+            dec_1, 128, number=33, kernel_regularizer=kernel_regularizer
+        )
+        # dec_2 = Add()([dec_2, res1])
+        dec_2 = Concatenate()([dec_2, res1])
+        # dec_3 = self.decoder_block(dec_2, 128, number=27)
+        if add_2x1up_layer:
+            if add_2up_layer:
+                dec_3 = UpSampling2D(size=(2, 2), interpolation="bilinear")(dec_2)
+                ups = UpSampling2D(size=(2, 2), interpolation="bilinear")(dec_3)
+            else:
+                ups = UpSampling2D(size=(4, 4), interpolation="bilinear")(dec_2)
+            print("adding the new upsampling")
+            ups_2 = UpSampling2D(size=(1, 2), interpolation="bilinear")(ups)
+        else:
+            if add_2up_layer:
+                dec_3 = UpSampling2D(size=(2, 2), interpolation="bilinear")(dec_2)
+                ups_2 = UpSampling2D(size=(2, 2), interpolation="bilinear")(dec_3)
+            else:
+                ups_2 = UpSampling2D(size=(4, 4), interpolation="bilinear")(dec_2)
+        out = Conv2D(
+            filters=int(self.n_classes),
+            kernel_size=1,
+            activation="softmax",
+            name="conv2d_out",
+        )(ups_2)
+        model = Model(inputs=inputs, outputs=out)
+        return model
+    def load_resnet_weights(self):
+        print("Loading weights for resnet18 backbone")
+        checkpoint_path = "./resnet/resnet18/checkpoints/model/model.ckpt-5865"
+        # reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
+        reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)  # for tf 2.0
+        var_to_shape_map = reader.get_variable_to_shape_map()
+        # for key in var_to_shape_map:
+        #     print("tensor_name: ", key)
+        #     print(reader.get_tensor(key).shape)  # Remove this is you want to print only variable names
+        for k in range(0, 16):
+            layer_name = "conv2d"
+            if k != 0:
+                layer_name += "_" + str(k)
+            weights_key = layer_name + "/kernel"
+            weights = reader.get_tensor(weights_key)
+            # print(weights.shape)
+            keras_weights = self.model.get_layer(layer_name).get_weights()
+            # print(keras_weights[0].shape)
+            self.model.get_layer(layer_name).set_weights([weights])
+            layer_name = "batch_normalization"
+            if k != 0:
+                layer_name += "_" + str(k)
+            if k < 13:
+                beta_key = layer_name + "/beta"
+                beta = reader.get_tensor(beta_key)
+                gamma_key = layer_name + "/gamma"
+                gamma = reader.get_tensor(gamma_key)
+                mean_key = layer_name + "/moving_mean"
+                mean = reader.get_tensor(mean_key)
+                var_key = layer_name + "/moving_variance"
+                var = reader.get_tensor(var_key)
+                keras_weights = self.model.get_layer(layer_name).get_weights()
+                # print(len(keras_weights))
+                # print(keras_weights[0].shape)
+                self.model.get_layer(layer_name).set_weights([gamma, beta, mean, var])
+        print("Weights for resnet18 backbone loaded!")

profiler.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import warnings
+warnings.simplefilter("ignore", FutureWarning)
+warnings.simplefilter("ignore", UserWarning)
+import torch
+from torch.profiler import profile, ProfilerActivity
+from model.model import Thundernet
+from models_repo.model_attention import Thundernet as Thundernet_attention
+from models_repo.model_attention_2 import Thundernet as Thundernet_attention2
+from models_repo.model_ppm_factors import Thundernet as Thundernet_ppm
+import time
+import cv2
+import numpy as np
+import tensorflow as tf
+device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Define input shape
+input_shape = (480, 640, 3)
+def execute_profiler(model: Thundernet) -> None:
+    """
+    Function to measure de CPU and CUDA times.
+    It prints the results on the console
+    Args:
+    - model: loaded model to profile
+    Returns:
+    - None
+    """
+    image = torch.randn(1, 480, 640, 3).cpu().numpy()
+    # model = torch.jit.trace(model, (image, depth))
+    with profile(
+        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+        record_shapes=True,
+        profile_memory=True,
+    ) as prof:
+        with torch.no_grad():
+            _ = model.predict(image)
+    print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))
+    return
+def measure_inference_time(model: Thundernet) -> None:
+    """
+    Function to measure the average inference time
+    and the FPS of a given AsymFormer model.
+    It prints the results on the console
+    Args:
+    - model: loaded model to profile
+    Returns:
+    - None
+    """
+    image = torch.randn(1, 480, 640, 3).cpu().numpy()
+    for _ in range(5):
+        _ = model.predict(image)
+    times = []
+    for _ in range(20):
+        tf.constant(0).numpy()
+        start = time.time()
+        _ = model.predict(image)
+        tf.constant(0).numpy()
+        times.append((time.time() - start) * 1000)
+    avg_time = sum(times) / len(times)
+    print(f"Average inference time: {avg_time:.2f} ms")
+    fps = 1000 / avg_time
+    print(f"FPS: {fps:.2f}")
+    return
+def main() -> None:
+    # PATH TO THE BEST MODEL SO FAR (.hdf5)
+    # weights_path = "D:/RealTimeSemanticSegmentation_Sofia/keras.hdf5"
+    # weights_path = "C:/Users/user/Documents/Thundernet/pruebas_modelos/32_ppm/BS4_lossBCE_weights_lr_0.00013713842558297858_reg-1.1743577101671763e-05-ep-13-val_loss0.11463435739278793-train_loss0.053004469722509384-val_iou0.8959722518920898-train_iou0.9606077075004578.hdf5"
+    weights_path = "keras.hdf5"
+    # Load the model. Change it depending on where it was trained
+    # ThunderNet = Thundernet_ppm(input_shape=input_shape, resnet_trainable=False, n_classes = 2)
+    ThunderNet = Thundernet(
+        input_shape=input_shape, resnet_trainable=False, n_classes=2
+    )
+    model = ThunderNet.model
+    ThunderNet.model.load_weights(weights_path)
+    execute_profiler(model)
+    measure_inference_time(model)
+    return
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,93 @@

+absl-py==0.15.0
+asttokens==2.1.0
+astunparse==1.6.3
+backcall==0.2.0
+cachetools==5.0.0
+certifi==2021.10.8
+charset-normalizer==2.0.12
+clang==5.0
+colorama==0.4.6
+contourpy==1.0.5
+cycler==0.11.0
+decorator==5.1.1
+efficientnet==1.0.0
+executing==1.2.0
+flatbuffers==1.12
+fonttools==4.37.4
+gast==0.4.0
+google-auth==2.6.5
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.44.0
+h5py==3.1.0
+idna==3.3
+image-classifiers==1.0.0
+imageio==2.22.4
+importlib-metadata==4.11.3
+install==1.3.5
+ipython==8.6.0
+jedi==0.18.1
+keras==2.6.0
+Keras-Applications==1.0.8
+Keras-Preprocessing==1.1.2
+kiwisolver==1.4.4
+Markdown==3.3.6
+matplotlib==3.6.1
+matplotlib-inline==0.1.6
+mtcnn==0.1.1
+networkx==2.8.8
+numpy==1.19.5
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-pyindex==1.0.9
+oauthlib==3.2.0
+opencv-python==4.3.0.38
+opt-einsum==3.3.0
+packaging==21.3
+pandas==1.4.0
+parso==0.8.3
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==9.2.0
+pkg_resources==0.0.0
+prompt-toolkit==3.0.32
+protobuf==3.20.0
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+Pygments==2.13.0
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pytz==2022.6
+PyWavelets==1.4.1
+PyYAML==6.0
+pyzmq==22.3.0
+requests==2.27.1
+requests-oauthlib==1.3.1
+rsa==4.8
+scikit-image==0.19.3
+scipy==1.8.0
+segmentation-models==1.0.1
+six==1.15.0
+stack-data==0.6.1
+tensorboard==2.8.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.6.0
+tensorflow-estimator==2.8.0
+tensorflow-gpu==2.6.0
+termcolor==1.1.0
+tifffile==2022.10.10
+torch==1.13.1
+tqdm==4.64.1
+traitlets==5.5.0
+typing-extensions==3.7.4.3
+urllib3==1.26.9
+wcwidth==0.2.5
+Werkzeug==2.1.1
+wrapt==1.12.1
+yacs==0.1.8
+zipp==3.8.0

resnet/.gitignore ADDED Viewed

	@@ -0,0 +1,208 @@

+# customer
+docker.sh
+# Created by https://www.gitignore.io/api/macos,python,pycharm,visualstudiocode
+### macOS ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+# User-specific stuff:
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.xml
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+# CMake
+cmake-build-debug/
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+## File-based project format:
+*.iws
+## Plugin-specific files:
+# IntelliJ
+/out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# Ruby plugin and RubyMine
+/.rakeTasks
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+# Sonarlint plugin
+.idea/sonarlint
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule.*
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+.history
+# End of https://www.gitignore.io/api/macos,python,pycharm,visualstudiocode

resnet/.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

resnet/.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (deeplearning)" project-jdk-type="Python SDK" />
+</project>

resnet/.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/resnet.iml" filepath="$PROJECT_DIR$/.idea/resnet.iml" />
+    </modules>
+  </component>
+</project>

resnet/.idea/resnet.iml ADDED Viewed

	@@ -0,0 +1,12 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

resnet/apt.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ git

resnet/crowdai.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "challenge_id" : "nips-2018-avc-robust-model-track",
+    "grader_id": "nips-2018-avc-robust-model-track",
+    "authors" : ["bveliqi"],
+    "description" : "resnet-18 baseline model",
+    "gpu": true
+}

resnet/fmodel.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import tensorflow as tf
+import os
+from foolbox.models import TensorFlowModel
+from resnet18.resnet_model import Model
+def create_model():
+    graph = tf.Graph()
+    with graph.as_default():
+        images = tf.placeholder(tf.float32, (None, 64, 64, 3))
+        # preprocessing
+        _R_MEAN = 123.68
+        _G_MEAN = 116.78
+        _B_MEAN = 103.94
+        _CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
+        features = images - tf.constant(_CHANNEL_MEANS)
+        model = Model(
+            resnet_size=18,
+            bottleneck=False,
+            num_classes=200,
+            num_filters=64,
+            kernel_size=3,
+            conv_stride=1,
+            first_pool_size=0,
+            first_pool_stride=2,
+            second_pool_size=7,
+            second_pool_stride=1,
+            block_sizes=[2, 2, 2, 2],
+            block_strides=[1, 2, 2, 2],
+            final_size=512,
+            version=2,
+            data_format=None,
+        )
+        logits = model(features, False)
+        with tf.variable_scope("utilities"):
+            saver = tf.train.Saver()
+    return graph, saver, images, logits
+def create_fmodel():
+    graph, saver, images, logits = create_model()
+    sess = tf.Session(graph=graph)
+    path = os.path.dirname(os.path.abspath(__file__))
+    path = os.path.join(path, "resnet18", "checkpoints", "models_repo")
+    saver.restore(sess, tf.train.latest_checkpoint(path))
+    with sess.as_default():
+        fmodel = TensorFlowModel(images, logits, bounds=(0, 255))
+    return fmodel
+if __name__ == "__main__":
+    # executable for debuggin and testing
+    print(create_fmodel())

resnet/main.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fmodel import create_fmodel
+from adversarial_vision_challenge import model_server
+if __name__ == "__main__":
+    fmodel = create_fmodel()
+    model_server(fmodel)

resnet/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+tensorflow-gpu===1.8.0
+foolbox==1.1.0
+git+https://github.com/bveliqi/adversarial-vision-challenge

resnet/resnet18/__init__.py ADDED Viewed

File without changes

resnet/resnet18/checkpoints/model/checkpoint ADDED Viewed

	@@ -0,0 +1 @@


1	+ model_checkpoint_path: "model.ckpt-5865"

resnet/resnet18/checkpoints/model/graph.pbtxt ADDED Viewed

The diff for this file is too large to render. See raw diff

resnet/resnet18/checkpoints/model/model.ckpt-5865.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a567213201e4c8fda90ed9196633feb86a551ed76578456a90796f94b674b96
+size 90221128

resnet/resnet18/checkpoints/model/model.ckpt-5865.index ADDED Viewed

Binary file (5.74 kB). View file

resnet/resnet18/checkpoints/model/model.ckpt-5865.meta ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6a76e71158e1b23d8993993d90ce435122e086def1a0c249e6a224655e73592
+size 1161995

resnet/resnet18/resnet_model.py ADDED Viewed

	@@ -0,0 +1,570 @@

+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains definitions for Residual Networks.
+Residual networks ('v1' ResNets) were originally proposed in:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+The full preactivation 'v2' ResNet variant was introduced by:
+[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
+The key difference of the full preactivation 'v2' variant compared to the
+'v1' variant in [1] is the use of batch normalization before every weight layer
+rather than after.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+_BATCH_NORM_DECAY = 0.997
+_BATCH_NORM_EPSILON = 1e-5
+DEFAULT_VERSION = 2
+################################################################################
+# Convenience functions for building the ResNet models_repo.
+################################################################################
+def batch_norm(inputs, training, data_format):
+    """Performs a batch normalization using a standard set of parameters."""
+    # We set fused=True for a significant performance boost. See
+    # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
+    return tf.layers.batch_normalization(
+        inputs=inputs,
+        axis=1 if data_format == "channels_first" else 3,
+        momentum=_BATCH_NORM_DECAY,
+        epsilon=_BATCH_NORM_EPSILON,
+        center=True,
+        scale=True,
+        training=training,
+        fused=True,
+    )
+def fixed_padding(inputs, kernel_size, data_format):
+    """Pads the input along the spatial dimensions independently of input size.
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+                   Should be a positive integer.
+      data_format: The input format ('channels_last' or 'channels_first').
+    Returns:
+      A tensor with the same format as the input with the data either intact
+      (if kernel_size == 1) or padded (if kernel_size > 1).
+    """
+    pad_total = kernel_size - 1
+    pad_beg = pad_total // 2
+    pad_end = pad_total - pad_beg
+    if data_format == "channels_first":
+        padded_inputs = tf.pad(
+            inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]]
+        )
+    else:
+        padded_inputs = tf.pad(
+            inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]
+        )
+    return padded_inputs
+def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
+    """Strided 2-D convolution with explicit padding."""
+    # The padding is consistent and is based only on `kernel_size`, not on the
+    # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
+    if strides > 1:
+        inputs = fixed_padding(inputs, kernel_size, data_format)
+    return tf.layers.conv2d(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=("SAME" if strides == 1 else "VALID"),
+        use_bias=False,
+        kernel_initializer=tf.variance_scaling_initializer(),
+        data_format=data_format,
+    )
+################################################################################
+# ResNet block definitions.
+################################################################################
+def _building_block_v1(
+    inputs, filters, training, projection_shortcut, strides, data_format
+):
+    """A single block for ResNet v1, without a bottleneck.
+    Convolution then batch normalization then ReLU as described by:
+      Deep Residual Learning for Image Recognition
+      https://arxiv.org/pdf/1512.03385.pdf
+      by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      filters: The number of filters for the convolutions.
+      training: A Boolean for whether the models_repo is in training or inference
+        mode. Needed for batch normalization.
+      projection_shortcut: The function to use for projection shortcuts
+        (typically a 1x1 convolution when downsampling the input).
+      strides: The block's stride. If greater than 1, this block will ultimately
+        downsample the input.
+      data_format: The input format ('channels_last' or 'channels_first').
+    Returns:
+      The output tensor of the block; shape should match inputs.
+    """
+    shortcut = inputs
+    if projection_shortcut is not None:
+        shortcut = projection_shortcut(inputs)
+        shortcut = batch_norm(
+            inputs=shortcut, training=training, data_format=data_format
+        )
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=3,
+        strides=strides,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=3,
+        strides=1,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs += shortcut
+    inputs = tf.nn.relu(inputs)
+    return inputs
+def _building_block_v2(
+    inputs, filters, training, projection_shortcut, strides, data_format
+):
+    """A single block for ResNet v2, without a bottleneck.
+    Batch normalization then ReLu then convolution as described by:
+      Identity Mappings in Deep Residual Networks
+      https://arxiv.org/pdf/1603.05027.pdf
+      by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      filters: The number of filters for the convolutions.
+      training: A Boolean for whether the models_repo is in training or inference
+        mode. Needed for batch normalization.
+      projection_shortcut: The function to use for projection shortcuts
+        (typically a 1x1 convolution when downsampling the input).
+      strides: The block's stride. If greater than 1, this block will ultimately
+        downsample the input.
+      data_format: The input format ('channels_last' or 'channels_first').
+    Returns:
+      The output tensor of the block; shape should match inputs.
+    """
+    shortcut = inputs
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    # The projection shortcut should come after the first batch norm and ReLU
+    # since it performs a 1x1 convolution.
+    if projection_shortcut is not None:
+        shortcut = projection_shortcut(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=3,
+        strides=strides,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=3,
+        strides=1,
+        data_format=data_format,
+    )
+    return inputs + shortcut
+def _bottleneck_block_v1(
+    inputs, filters, training, projection_shortcut, strides, data_format
+):
+    """A single block for ResNet v1, with a bottleneck.
+    Similar to _building_block_v1(), except using the "bottleneck" blocks
+    described in:
+      Convolution then batch normalization then ReLU as described by:
+        Deep Residual Learning for Image Recognition
+        https://arxiv.org/pdf/1512.03385.pdf
+        by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      filters: The number of filters for the convolutions.
+      training: A Boolean for whether the models_repo is in training or inference
+        mode. Needed for batch normalization.
+      projection_shortcut: The function to use for projection shortcuts
+        (typically a 1x1 convolution when downsampling the input).
+      strides: The block's stride. If greater than 1, this block will ultimately
+        downsample the input.
+      data_format: The input format ('channels_last' or 'channels_first').
+    Returns:
+      The output tensor of the block; shape should match inputs.
+    """
+    shortcut = inputs
+    if projection_shortcut is not None:
+        shortcut = projection_shortcut(inputs)
+        shortcut = batch_norm(
+            inputs=shortcut, training=training, data_format=data_format
+        )
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=1,
+        strides=1,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=3,
+        strides=strides,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=4 * filters,
+        kernel_size=1,
+        strides=1,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs += shortcut
+    inputs = tf.nn.relu(inputs)
+    return inputs
+def _bottleneck_block_v2(
+    inputs, filters, training, projection_shortcut, strides, data_format
+):
+    """A single block for ResNet v2, without a bottleneck.
+    Similar to _building_block_v2(), except using the "bottleneck" blocks
+    described in:
+      Convolution then batch normalization then ReLU as described by:
+        Deep Residual Learning for Image Recognition
+        https://arxiv.org/pdf/1512.03385.pdf
+        by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
+    Adapted to the ordering conventions of:
+      Batch normalization then ReLu then convolution as described by:
+        Identity Mappings in Deep Residual Networks
+        https://arxiv.org/pdf/1603.05027.pdf
+        by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      filters: The number of filters for the convolutions.
+      training: A Boolean for whether the models_repo is in training or inference
+        mode. Needed for batch normalization.
+      projection_shortcut: The function to use for projection shortcuts
+        (typically a 1x1 convolution when downsampling the input).
+      strides: The block's stride. If greater than 1, this block will ultimately
+        downsample the input.
+      data_format: The input format ('channels_last' or 'channels_first').
+    Returns:
+      The output tensor of the block; shape should match inputs.
+    """
+    shortcut = inputs
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    # The projection shortcut should come after the first batch norm and ReLU
+    # since it performs a 1x1 convolution.
+    if projection_shortcut is not None:
+        shortcut = projection_shortcut(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=1,
+        strides=1,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=3,
+        strides=strides,
+        data_format=data_format,
+    )
+    inputs = batch_norm(inputs, training, data_format)
+    inputs = tf.nn.relu(inputs)
+    inputs = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=4 * filters,
+        kernel_size=1,
+        strides=1,
+        data_format=data_format,
+    )
+    return inputs + shortcut
+def block_layer(
+    inputs, filters, bottleneck, block_fn, blocks, strides, training, name, data_format
+):
+    """Creates one layer of blocks for the ResNet models_repo.
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      filters: The number of filters for the first convolution of the layer.
+      bottleneck: Is the block created a bottleneck block.
+      block_fn: The block to use within the models_repo, either `building_block` or
+        `bottleneck_block`.
+      blocks: The number of blocks contained in the layer.
+      strides: The stride to use for the first convolution of the layer. If
+        greater than 1, this layer will ultimately downsample the input.
+      training: Either True or False, whether we are currently training the
+        models_repo. Needed for batch norm.
+      name: A string name for the tensor output of the block layer.
+      data_format: The input format ('channels_last' or 'channels_first').
+    Returns:
+      The output tensor of the block layer.
+    """
+    # Bottleneck blocks end with 4x the number of filters as they start with
+    filters_out = filters * 4 if bottleneck else filters
+    def projection_shortcut(inputs):
+        return conv2d_fixed_padding(
+            inputs=inputs,
+            filters=filters_out,
+            kernel_size=1,
+            strides=strides,
+            data_format=data_format,
+        )
+    # Only the first block per block_layer uses projection_shortcut and strides
+    inputs = block_fn(
+        inputs, filters, training, projection_shortcut, strides, data_format
+    )
+    for _ in range(1, blocks):
+        inputs = block_fn(inputs, filters, training, None, 1, data_format)
+    return tf.identity(inputs, name)
+class Model(object):
+    """Base class for building the Resnet Model."""
+    def __init__(
+        self,
+        resnet_size,
+        bottleneck,
+        num_classes,
+        num_filters,
+        kernel_size,
+        conv_stride,
+        first_pool_size,
+        first_pool_stride,
+        second_pool_size,
+        second_pool_stride,
+        block_sizes,
+        block_strides,
+        final_size,
+        version=DEFAULT_VERSION,
+        data_format=None,
+    ):
+        """Creates a models_repo for classifying an image.
+        Args:
+          resnet_size: A single integer for the size of the ResNet models_repo.
+          bottleneck: Use regular blocks or bottleneck blocks.
+          num_classes: The number of classes used as labels.
+          num_filters: The number of filters to use for the first block layer
+            of the models_repo. This number is then doubled for each subsequent block
+            layer.
+          kernel_size: The kernel size to use for convolution.
+          conv_stride: stride size for the initial convolutional layer
+          first_pool_size: Pool size to be used for the first pooling layer.
+            If none, the first pooling layer is skipped.
+          first_pool_stride: stride size for the first pooling layer. Not used
+            if first_pool_size is None.
+          second_pool_size: Pool size to be used for the second pooling layer.
+          second_pool_stride: stride size for the final pooling layer
+          block_sizes: A list containing n values, where n is the number of sets of
+            block layers desired. Each value should be the number of blocks in the
+            i-th set.
+          block_strides: List of integers representing the desired stride size for
+            each of the sets of block layers. Should be same length as block_sizes.
+          final_size: The expected size of the models_repo after the second pooling.
+          version: Integer representing which version of the ResNet network to use.
+            See README for details. Valid values: [1, 2]
+          data_format: Input format ('channels_last', 'channels_first', or None).
+            If set to None, the format is dependent on whether a GPU is available.
+        Raises:
+          ValueError: if invalid version is selected.
+        """
+        self.resnet_size = resnet_size
+        if not data_format:
+            data_format = (
+                "channels_first" if tf.test.is_built_with_cuda() else "channels_last"
+            )
+        self.resnet_version = version
+        if version not in (1, 2):
+            raise ValueError(
+                "Resnet version should be 1 or 2. See README for citations."
+            )
+        self.bottleneck = bottleneck
+        if bottleneck:
+            if version == 1:
+                self.block_fn = _bottleneck_block_v1
+            else:
+                self.block_fn = _bottleneck_block_v2
+        else:
+            if version == 1:
+                self.block_fn = _building_block_v1
+            else:
+                self.block_fn = _building_block_v2
+        self.data_format = data_format
+        self.num_classes = num_classes
+        self.num_filters = num_filters
+        self.kernel_size = kernel_size
+        self.conv_stride = conv_stride
+        self.first_pool_size = first_pool_size
+        self.first_pool_stride = first_pool_stride
+        self.second_pool_size = second_pool_size
+        self.second_pool_stride = second_pool_stride
+        self.block_sizes = block_sizes
+        self.block_strides = block_strides
+        self.final_size = final_size
+    def __call__(self, inputs, training):
+        """Add operations to classify a batch of input images.
+        Args:
+          inputs: A Tensor representing a batch of input images.
+          training: A boolean. Set to True to add operations required only when
+            training the classifier.
+        Returns:
+          A logits Tensor with shape [<batch_size>, self.num_classes].
+        """
+        if self.data_format == "channels_first":
+            # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
+            # This provides a large performance boost on GPU. See
+            # https://www.tensorflow.org/performance/performance_guide#data_formats
+            inputs = tf.transpose(inputs, [0, 3, 1, 2])
+        inputs = conv2d_fixed_padding(
+            inputs=inputs,
+            filters=self.num_filters,
+            kernel_size=self.kernel_size,
+            strides=self.conv_stride,
+            data_format=self.data_format,
+        )
+        inputs = tf.identity(inputs, "initial_conv")
+        if self.first_pool_size:
+            inputs = tf.layers.max_pooling2d(
+                inputs=inputs,
+                pool_size=self.first_pool_size,
+                strides=self.first_pool_stride,
+                padding="SAME",
+                data_format=self.data_format,
+            )
+            inputs = tf.identity(inputs, "initial_max_pool")
+        for i, num_blocks in enumerate(self.block_sizes):
+            num_filters = self.num_filters * (2**i)
+            inputs = block_layer(
+                inputs=inputs,
+                filters=num_filters,
+                bottleneck=self.bottleneck,
+                block_fn=self.block_fn,
+                blocks=num_blocks,
+                strides=self.block_strides[i],
+                training=training,
+                name="block_layer{}".format(i + 1),
+                data_format=self.data_format,
+            )
+        inputs = batch_norm(inputs, training, self.data_format)
+        inputs = tf.nn.relu(inputs)
+        # The current top layer has shape
+        # `batch_size x pool_size x pool_size x final_size`.
+        # ResNet does an Average Pooling layer over pool_size,
+        # but that is the same as doing a reduce_mean. We do a reduce_mean
+        # here because it performs better than AveragePooling2D.
+        axes = [2, 3] if self.data_format == "channels_first" else [1, 2]
+        inputs = tf.reduce_mean(inputs, axes, keepdims=True)
+        inputs = tf.identity(inputs, "final_reduce_mean")
+        inputs = tf.reshape(inputs, [-1, self.final_size])
+        readout_layer = tf.layers.Dense(units=self.num_classes, name="readout_layer")
+        inputs = readout_layer(inputs)
+        inputs = tf.identity(inputs, "final_dense")
+        return inputs

resnet/run.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ echo "Starting Server..."
2	+ python ./main.py

thundernet_config.py ADDED Viewed

	@@ -0,0 +1,18 @@

+train_path: str = "C:/Users/user/Documents/pruned_training/training/"
+val_path: str = "C:/Users/user/Documents/pruned_training/val/"
+model_dir: str = "C:/Users/user/Documents/Thundernet/models/"
+model_weights: str = (
+    "C:/Users/user/Documents/Thundernet/model/BS4_lossBCE_weights_lr_0.00013713842558297858_reg-1.1743577101671763e-05-ep-13-val_loss0.11463435739278793-train_loss0.053004469722509384-val_iou0.8959722518920898-train_iou0.9606077075004578.hdf5"
+)
+batch_size: int = 4
+augment: bool = False  # True
+rand_crop: bool = 0.05
+loss: str = "BCE"
+weights: list = None # [0.56, 3.27]
+classes: int = 2
+pretrained_bool: bool = False
+pretrained_weigths: str = None
+lr: float = 1e-4
+epochs: int = 15
+resolution: str = "640x480"
+kernel_regularizer: float = 2e-4

train_config.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import os
+from data_gen import DataGenerator
+from os import listdir
+from utils import (
+    iou,
+    PlotLosses,
+    dice_loss,
+    focal_loss,
+    categorical_loss,
+    categorical_focal_loss,
+    resolution2framesize3cha,
+    resolution2framesize,
+    bce_loss,
+)
+import matplotlib.pyplot as plt
+import tensorflow as tf
+tf.config.run_functions_eagerly(True)
+# from keras.backend.tensorflow_backend import set_session
+import argparse
+import sys
+import numpy as np
+import thundernet_config as Thundernet_config
+from datetime import datetime
+from matplotlib import pyplot as plt
+from model.model import Thundernet as Thundernet_original
+from model.model_ppm_factors import Thundernet as Thundernet_ppm
+from pathlib import Path
+from collections import defaultdict
+import copy
+plt.switch_backend("agg")
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--train_dir",
+    type=str,
+    default=Thundernet_config.train_path,
+    help="The directory containing the train image dataset.",
+)
+parser.add_argument(
+    "--val_dir",
+    type=str,
+    default=Thundernet_config.val_path,
+    help="The directory containing the validation image dataset.",
+)
+parser.add_argument(
+    "--batch_size",
+    type=int,
+    default=Thundernet_config.batch_size,
+    choices=[1, 2, 4, 8, 16],
+    help="Batch size used for training Thundernet",
+)
+parser.add_argument(
+    "--augment",
+    type=bool,
+    default=Thundernet_config.augment,
+    choices=[False, True],
+    help="Whether to use color augmentation for training Thundernet.",
+)
+parser.add_argument(
+    "--rand_crop",
+    type=float,
+    default=Thundernet_config.rand_crop,
+    choices=[0, 0.02, 0.05, 0.1, 0.2, 0.5],
+    help="Frequency of random crop data augmentation technique.",
+)
+parser.add_argument(
+    "--loss",
+    type=str,
+    default=Thundernet_config.loss,
+    choices=["BCE", "BFL", "CFL", "DCL", "FTL", "CAT"],
+    help="Loss function to be used - Binary Cross Entropy (BCE), Focal Loss (FL) , Dice Coefficient Loss (DCL) and Focal Tversky Loss (FTL)",
+)
+parser.add_argument(
+    "--model_dir",
+    type=str,
+    default=Thundernet_config.model_dir,
+    help="Base directory for the models_repo. "
+    "Make sure 'model_checkpoint_path' given in 'checkpoint' file matches "
+    "with checkpoint name.",
+)
+parser.add_argument(
+    "--weights",
+    type=dict,
+    default=Thundernet_config.weights,
+    help="Class weights used for Weighted Binary Cross Entropy Loss.",
+)
+parser.add_argument(
+    "--lr", type=float, default=Thundernet_config.lr, help="Learning Rate."
+)
+parser.add_argument(
+    "--epochs", type=int, default=Thundernet_config.epochs, help="Epochs"
+)
+parser.add_argument(
+    "--classes", type=int, default=Thundernet_config.classes, help="Epochs"
+)
+parser.add_argument(
+    "--resolution",
+    type=str,
+    default=Thundernet_config.resolution,
+    help="Input Resolution",
+)
+parser.add_argument(
+    "--kernel_regularizer",
+    type=float,
+    default=Thundernet_config.kernel_regularizer,
+    help="kernel_regularizer",
+)
+parser.add_argument(
+    "--pretrained",
+    type=bool,
+    default=Thundernet_config.pretrained_bool,
+    help="In case you want to train",
+)
+parser.add_argument(
+    "--pretrained_weigths",
+    type=str,
+    default=Thundernet_config.pretrained_weigths,
+    help="In case you want to train",
+)
+def main(
+    args: list,
+    transformations: tuple = tuple(),
+    model: str = "original",
+    class_mappings: dict = None,
+):
+    """
+    Train the model
+    Args:
+     - args (list): list of parsed arguments
+     - model (str): type of model. Default: "original"
+     - class_mappings (dict): class mapper. Default: None
+     - transformations (tuple): list of transformations to execute in the data. Default: tuple()
+     - show (bool): display the predictions. Default: False
+    Returns:
+    - None
+    """
+    FLAGS: list = parser.parse_args(args)
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # use id from $ nvidia-smi
+    mypath_train = FLAGS.train_dir + "images/"
+    label_path_train = FLAGS.train_dir + "labels/"
+    list_IDs_train = [f[:-4] for f in listdir(mypath_train) if f[-4:] == ".jpg"]
+    mypath_val = FLAGS.val_dir + "images/"
+    label_path_val = FLAGS.val_dir + "labels/"
+    list_IDs_val = [f[:-4] for f in listdir(mypath_val) if f[-4:] == ".jpg"]
+    # First we assure that the dir for saving the experiments is created
+    if not os.path.exists(FLAGS.model_dir):
+        os.makedirs(FLAGS.model_dir)
+    # For every trial of the same experiment we create a new subfolder
+    k = 1
+    dir_created = False
+    while not dir_created:
+        model_dir = FLAGS.model_dir + str(k) + "/"
+        if not os.path.exists(model_dir):
+            os.makedirs(model_dir)
+            dir_created = True
+        else:
+            k += 1
+    # Model
+    if model == "original":
+        Thundernet = Thundernet_original
+    elif model == "ppm":
+        Thundernet = Thundernet_ppm
+    else:
+        raise ValueError(f"Unknown model: {model}")
+    # Class mappings
+    if class_mappings is not None:
+        FLAGS.classes = len(set(class_mappings.values())) + 1
+    # Write the file configuration in model_dir
+    file = open(model_dir + "config.txt", "w")
+    file.write("Experiment num " + str(k) + "\n")
+    file.write("Fecha=" + str(datetime.now()) + "\n")
+    file.write("Train with=" + FLAGS.train_dir + "\n")
+    file.write("Val with=" + FLAGS.val_dir + "\n")
+    file.write("Input Resoltuion with=" + FLAGS.resolution + "\n")
+    file.write("Batch Size=" + str(FLAGS.batch_size) + "\n")
+    file.write("Batch augment=" + str(FLAGS.augment) + "\n")
+    file.write("Rand Crop=" + str(FLAGS.rand_crop) + "\n")
+    file.write("Loss=" + FLAGS.loss + "\n")
+    file.write("Model dir=" + FLAGS.model_dir + "\n")
+    file.write("weights=" + str(FLAGS.weights) + "\n")
+    file.write("lr=" + str(FLAGS.lr) + "\n")
+    file.write("epochs=" + str(FLAGS.epochs) + "\n")
+    file.write("classes=" + str(FLAGS.classes) + "\n")
+    file.write("kernel_regularizer=" + str(FLAGS.kernel_regularizer) + "\n")
+    file.write("pretrained=" + str(FLAGS.pretrained) + "\n")
+    file.write("pretrained_weigths=" + str(FLAGS.pretrained_weigths) + "\n")
+    file.write("Class mappings=" + str(class_mappings) + "\n")
+    file.write("Model=" + model + "\n")
+    file.write(f"Transformations: {transformations}\n")
+    file.write("Comentarios=" + "" + "\n")
+    file.close()
+    print(
+        "resolution2framesize3cha(FLAGS.resolution) ",
+        resolution2framesize3cha(FLAGS.resolution),
+    )
+    thundernet = Thundernet(
+        input_shape=resolution2framesize3cha(FLAGS.resolution),
+        n_classes=FLAGS.classes,
+        resnet_trainable=True,
+        kernel_regularizer=FLAGS.kernel_regularizer,
+    )
+    if FLAGS.pretrained:
+        print("loading weights from", FLAGS.pretrained_weigths)
+        thundernet.model.load_weights(
+            FLAGS.pretrained_weigths, by_name=True, skip_mismatch=True
+        )
+    lr = FLAGS.lr
+    opt = tf.keras.optimizers.Adam(learning_rate=lr)  # for keras 2.6.0
+    if not model_dir.endswith(os.path.sep):
+        model_dir += os.path.sep
+    callbacks = [
+        PlotLosses(model_dir),
+        tf.keras.callbacks.ModelCheckpoint(
+            filepath=os.path.normpath(
+                os.path.join(
+                    model_dir,
+                    f"BS{FLAGS.batch_size}_loss{FLAGS.loss}_weights_lr_{lr}_reg-{FLAGS.kernel_regularizer}-ep-{{epoch}}-val_loss{{val_loss}}-train_loss{{loss}}-val_iou{{val_iou}}-train_iou{{iou}}.hdf5",
+                )
+            ),
+            save_best_only=True,
+            save_weights_only=True,
+        ),
+    ]
+    if FLAGS.loss == "BCE":
+        loss = bce_loss()
+    elif FLAGS.loss == "BFL":
+        loss = focal_loss()
+    elif FLAGS.loss == "DCL":
+        loss = dice_loss()
+    elif FLAGS.loss == "CFL":
+        loss = categorical_focal_loss()
+    elif FLAGS.loss == "CAT":
+        loss = categorical_loss()
+    thundernet.model.compile(loss=loss, optimizer=opt, metrics=[iou])
+    dataset_dir = Path(Thundernet_config.train_path).parent
+    training_generator, validation_generator = DataGenerator.create_generators(
+        dataset_dir,
+        FLAGS.classes,
+        training_batch_size=Thundernet_config.batch_size,
+        validation_batch_size=Thundernet_config.batch_size,
+        to_stereo=False,
+        transformations=transformations,
+        class_mappings=class_mappings,
+    )
+    if FLAGS.loss == "BCE":
+        weights = FLAGS.weights
+    else:
+        weights = FLAGS.weights
+    history = thundernet.model.fit_generator(
+        generator=training_generator,
+        validation_data=validation_generator,
+        callbacks=callbacks,
+        use_multiprocessing=False,
+        workers=6,
+        epochs=FLAGS.epochs,
+        class_weight=None,
+    )
+if __name__ == "__main__":
+    main(sys.argv[1:], model="original", class_mappings=defaultdict(int, {1: 1}))
+    # main(sys.argv[1:], model="ppm", class_mappings=defaultdict(int, {1: 1}))
+    # main(sys.argv[1:], model='original', class_mappings=defaultdict(int, {1: 1, 2: 2, 5: 3})) # In case you also want to segment two specific type of objects (original class_id=2 and class_id=5)
+    # main(sys.argv[1:], model='ppm', class_mappings=defaultdict(int, {1: 1, 2: 2, 5: 2})) # In case you want to treat both objects as the same class

train_optuna.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import os
+import optuna
+from data_gen import DataGenerator
+from os import listdir
+from utils import (
+    iou,
+    PlotLosses,
+    dice_loss,
+    focal_loss,
+    categorical_loss,
+    categorical_focal_loss,
+    resolution2framesize3cha,
+    resolution2framesize,
+    bce_loss,
+)
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from model.model import Thundernet as Thundernet_original
+from models_repo.model_attention import Thundernet as Thundernet_attention
+from models_repo.model_attention_2 import Thundernet as Thundernet_attention2
+from models_repo.model_ppm_factors import Thundernet as Thundernet_ppm
+from datetime import datetime
+from matplotlib import pyplot as plt
+from pathlib import Path
+import os
+# from data_gen_tfkeras import DataGenerator
+from data_gen import DataGenerator
+from os import listdir
+from utils import (
+    iou,
+    PlotLosses,
+    dice_loss,
+    focal_loss,
+    categorical_loss,
+    categorical_focal_loss,
+    resolution2framesize3cha,
+    resolution2framesize,
+)
+import matplotlib.pyplot as plt
+import tensorflow as tf
+tf.config.run_functions_eagerly(True)
+# from keras.backend.tensorflow_backend import set_session
+import argparse
+import sys
+import numpy as np
+import thundernet_config as Thundernet_config
+from datetime import datetime
+from matplotlib import pyplot as plt
+from model.model import Thundernet as Thundernet_original
+from models_repo.model_attention import Thundernet as Thundernet_attention
+from models_repo.model_attention_2 import Thundernet as Thundernet_attention2
+from models_repo.model_ppm_factors import Thundernet as Thundernet_ppm
+from pathlib import Path
+from collections import defaultdict
+import copy
+from collections import defaultdict
+# Optuna-related imports
+import optuna
+import copy
+plt.switch_backend("agg")
+def objective(trial):
+    # Define the hyperparameters you want to tune
+    batch_size = trial.suggest_categorical("batch_size", [1, 2, 4])
+    lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)  # Learning rate
+    kernel_regularizer = trial.suggest_loguniform("kernel_regularizer", 1e-5, 1e-2)
+    # Call the main function with trial parameters
+    return main(
+        model="ppm",  # Use the 'ppm' model as per your request
+        class_mappings=defaultdict(int, {1: 1}),
+        batch_size=batch_size,
+        lr=lr,
+        kernel_regularizer=kernel_regularizer,
+        epochs=1,  # Run only for 1 epoch
+        loss="BCE",
+        transformations=(),  # Add transformations as needed
+    )
+def main(
+    model="original",
+    class_mappings=None,
+    batch_size=8,
+    lr=1e-4,
+    kernel_regularizer=0.001,
+    epochs=1,
+    loss="BCE",
+    transformations=tuple(),
+):
+    # Parsing arguments for the main function
+    FLAGS = argparse.Namespace(
+        train_dir=Thundernet_config.train_path,
+        val_dir=Thundernet_config.val_path,
+        batch_size=batch_size,
+        augment=Thundernet_config.augment,
+        rand_crop=Thundernet_config.rand_crop,
+        loss=loss,
+        model_dir=Thundernet_config.model_dir,
+        weights=Thundernet_config.weights,
+        lr=lr,
+        epochs=epochs,
+        classes=Thundernet_config.classes,
+        resolution=Thundernet_config.resolution,
+        kernel_regularizer=kernel_regularizer,
+        pretrained=Thundernet_config.pretrained_bool,
+        pretrained_weigths=Thundernet_config.pretrained_weigths,
+    )
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    mypath_train = FLAGS.train_dir + "images/"
+    label_path_train = FLAGS.train_dir + "labels/"
+    list_IDs_train = [f[:-4] for f in listdir(mypath_train) if f[-4:] == ".jpg"]
+    mypath_val = FLAGS.val_dir + "images/"
+    label_path_val = FLAGS.val_dir + "labels/"
+    list_IDs_val = [f[:-4] for f in listdir(mypath_val) if f[-4:] == ".jpg"]
+    # Model Setup
+    if model == "original":
+        Thundernet = Thundernet_original
+    elif model == "attention":
+        Thundernet = Thundernet_attention
+    elif model == "attention2":
+        Thundernet = Thundernet_attention2
+    elif model == "ppm":
+        Thundernet = Thundernet_ppm
+    else:
+        raise ValueError(f"Unknown model: {model}")
+    # Model directory setup
+    model_dir = FLAGS.model_dir
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)
+    thundernet = Thundernet(
+        input_shape=resolution2framesize3cha(FLAGS.resolution),
+        n_classes=FLAGS.classes,
+        resnet_trainable=True,
+        kernel_regularizer=FLAGS.kernel_regularizer,
+    )
+    if FLAGS.pretrained:
+        thundernet.model.load_weights(
+            FLAGS.pretrained_weigths, by_name=True, skip_mismatch=True
+        )
+    # Optimizer setup
+    opt = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr)
+    # Set the loss function
+    if FLAGS.loss == "BCE":
+        loss = bce_loss()
+    elif FLAGS.loss == "BFL":
+        loss = focal_loss()
+    elif FLAGS.loss == "DCL":
+        loss = dice_loss()
+    elif FLAGS.loss == "CFL":
+        loss = categorical_focal_loss()
+    elif FLAGS.loss == "CAT":
+        loss = categorical_loss()
+    thundernet.model.compile(loss=loss, optimizer=opt, metrics=[iou])
+    # Data generators setup
+    dataset_dir = Path(Thundernet_config.train_path).parent
+    training_generator, validation_generator = DataGenerator.create_generators(
+        dataset_dir,
+        FLAGS.classes,
+        training_batch_size=FLAGS.batch_size,
+        to_stereo=False,
+        transformations=transformations,
+        class_mappings=class_mappings,
+    )
+    # Train the model
+    history = thundernet.model.fit(
+        training_generator,
+        validation_data=validation_generator,
+        epochs=FLAGS.epochs,
+        class_weight=None,
+        callbacks=[PlotLosses(model_dir)],
+        use_multiprocessing=False,
+        workers=6,
+    )
+    # Return validation loss or metric for Optuna optimization
+    print(history)
+    return np.mean(history.history["iou"])
+# Optuna study setup
+if __name__ == "__main__":
+    study = optuna.create_study(
+        direction="maximize", storage="sqlite:///db.sqlite3"
+    )  # Minimize the validation loss
+    study.optimize(objective, n_trials=100)  # Optimize for 10 trials
+    print("Best hyperparameters found: ", study.best_params)
+    import optuna.visualization as vis
+    # Guardar el gráfico de importancia de parámetros
+    fig = vis.plot_param_importances(study)
+    fig.write_image("param_importance_IoU.png")
+    # Guardar el gráfico del historial de optimización
+    fig = vis.plot_optimization_history(study)
+    fig.write_image("optimization_history_IoU.png")
+    import pandas as pd
+    # Assuming `study` is the Optuna study object
+    df = study.trials_dataframe()
+    df.to_csv("results_optuna_IoU.csv")
+    # Plot Learning Rate vs Loss
+    plt.figure(figsize=(8, 6))
+    plt.scatter(df["params_lr"], df["value"], color="blue", alpha=0.7)
+    plt.title("Learning Rate vs Loss")
+    plt.xlabel("Learning Rate")
+    plt.ylabel("Loss")
+    plt.grid(True)
+    plt.savefig("lr_vs_loss_IoU.png")
+    plt.close()
+    # Plot Weight Decay vs Loss
+    plt.figure(figsize=(8, 6))
+    plt.scatter(df["params_batch_size"], df["value"], color="green", alpha=0.7)
+    plt.title("Batch size vs Loss")
+    plt.xlabel("Batch size")
+    plt.ylabel("Loss")
+    plt.grid(True)
+    plt.savefig("batch_size_vs_loss_IoU.png")
+    plt.close()
+    # Plot Loss Weight vs Loss
+    plt.figure(figsize=(8, 6))
+    plt.scatter(df["params_kernel_regularizer"], df["value"], color="red", alpha=0.7)
+    plt.title("Kernel regularizer vs Loss")
+    plt.xlabel("Kernel regularizer")
+    plt.ylabel("Loss")
+    plt.grid(True)
+    plt.savefig("kernel_regularizer_vs_loss_IoU.png")
+    plt.close()

utils.py ADDED Viewed

	@@ -0,0 +1,505 @@

+import numpy as np
+from tensorflow.keras import backend as K
+import tensorflow.keras as keras
+import math
+from matplotlib import pyplot as plt
+import cv2
+import time
+import scipy
+from os import listdir
+from IPython.display import clear_output
+import segmentation_models as sm
+from PIL import Image
+import images_toolkit as tlk
+def dice_coef(y_true, y_pred, smooth=1):
+    y_true_f = K.flatten(y_true)
+    y_pred_f = K.flatten(y_pred)
+    intersection = K.sum(y_true_f * y_pred_f)
+    return (2.0 * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
+def dice_loss(alpha=1):
+    def dice_coef_loss(y_true, y_pred):
+        return 1 - alpha * dice_coef(y_true, y_pred)
+    return dice_coef_loss
+def categorical_loss():
+    def categorical(y_true, y_pred):
+        return keras.losses.CategoricalCrossentropy()(y_true, y_pred)
+    return categorical
+def bce_loss():
+    def bce(y_true, y_pred):
+        return keras.losses.BinaryCrossentropy()(y_true, y_pred)
+    return bce
+def tversky(y_true, y_pred, smooth=1, alpha=0.7):
+    y_true_pos = K.flatten(y_true)
+    y_pred_pos = K.flatten(y_pred)
+    true_pos = K.sum(y_true_pos * y_pred_pos)
+    false_neg = K.sum(y_true_pos * (1 - y_pred_pos))
+    false_pos = K.sum((1 - y_true_pos) * y_pred_pos)
+    return (true_pos + smooth) / (
+        true_pos + alpha * false_neg + (1 - alpha) * false_pos + smooth
+    )
+def tversky_loss(y_true, y_pred):
+    return 1 - tversky(y_true, y_pred)
+# def focal_tversky_loss(y_true, y_pred, gamma=0.75):
+#    tv = tversky(y_true, y_pred)
+#    return K.pow((1 - tv), gamma)
+def categorical_focal_loss(gamma=2.0, alpha=0.25):
+    def cate_focal_loss(y_true, y_pred):
+        CAT_FL = sm.losses.categorical_focal_loss
+        CAT_FL.gamma = gamma
+        CAT_FL.alpha = alpha
+        return CAT_FL(y_true, y_pred)
+    return cate_focal_loss
+def focal_loss(gamma=2.0, alpha=0.7):
+    def focal_tversky_loss(y_true, y_pred):
+        tv = tversky(y_true, y_pred, alpha)
+        return K.pow((1 - tv), gamma)
+    return focal_tversky_loss
+def single_iou(y_true, y_pred, label: int):
+    """
+    Return the Intersection over Union (IoU) for a given label.
+    Args:
+        y_true: the expected y values as a one-hot
+        y_pred: the predicted y values as a one-hot or softmax output
+        label: the label to return the IoU for
+    Returns:
+        the IoU for the given label
+    """
+    # extract the label values using the argmax operator then
+    # calculate equality of the predictions and truths to the label
+    y_true = K.cast(K.equal(K.argmax(y_true), label), K.floatx())
+    y_pred = K.cast(K.equal(K.argmax(y_pred), label), K.floatx())
+    # y_true = K.cast(K.equal(K.argmax(y_true), 1), K.floatx())
+    # y_pred = K.cast(K.equal(K.argmax(y_pred), 1), K.floatx())
+    # calculate the |intersection| (AND) of the labels
+    intersection = K.sum(y_true * y_pred)
+    # calculate the |union| (OR) of the labels
+    union = K.sum(y_true) + K.sum(y_pred) - intersection
+    # avoid divide by zero - if the union is zero, return 1
+    # otherwise, return the intersection over union
+    a = K.switch(K.equal(union, 0), 1.0, intersection / union)
+    return K.switch(K.equal(union, 0), 1.0, intersection / union)
+def iou(y_true, y_pred):
+    """
+    Return the Intersection over Union (IoU) score.
+    Args:
+        y_true: the expected y values as a one-hot
+        y_pred: the predicted y values as a one-hot or softmax output
+    Returns:
+        the scalar IoU value (mean over all labels)
+    """
+    # get number of labels to calculate IoU for
+    num_labels = K.int_shape(y_pred)[-1]
+    # initialize a variable to store total IoU in
+    total_iou = K.variable(0)
+    # iterate over labels to calculate IoU for
+    for label in range(num_labels):
+        total_iou = total_iou + single_iou(y_true, y_pred, label)
+    # divide total IoU by number of labels to get mean IoU
+    a = total_iou / num_labels
+    return total_iou / num_labels
+def simple_iou(gt, pred):
+    """Computes IoU for a binary classified image. Input shapes: (h, w)"""
+    return np.nan_to_num(
+        np.sum((gt == 1) & (pred == 1)) / np.sum((gt == 1) | (pred == 1)), 0
+    )
+def simple_iou_for_multiple_classes(gt, pred, n_classes):
+    """Computes IoU for a categorically classified image. Input shapes: (h, w)
+    If n_classes > 3, then it will also compute the IoU of the union of all classes
+    that are >= 3 (i.e., the IoU of objects as one).
+    Returns: array of (h, w, n_classes) if n_classes <= 3
+             array of (h, w, n_classes+1) if n_classes > 3
+    """
+    assert gt.shape == pred.shape and gt.ndim == 2
+    assert np.max(gt) < n_classes and np.max(pred) < n_classes
+    f_gt = gt.flatten()
+    f_pred = pred.flatten()
+    gt_matrix = np.zeros((f_gt.size, n_classes), dtype=int)
+    pred_matrix = gt_matrix.copy()
+    gt_matrix[np.arange(f_gt.size), f_gt] = 1
+    pred_matrix[np.arange(f_gt.size), f_pred] = 1
+    intersections = np.sum((gt_matrix == 1) & (pred_matrix == 1), axis=0)
+    unions = np.sum((gt_matrix == 1) | (pred_matrix == 1), axis=0)
+    ious = intersections / unions
+    if n_classes > 3:
+        gt_as_one = f_gt >= 2
+        pred_as_one = f_pred >= 2
+        iou_as_one = np.sum(gt_as_one & pred_as_one) / np.sum(gt_as_one | pred_as_one)
+        return np.append(ious, iou_as_one)
+    else:
+        return ious
+def add_mask(image, mask):
+    b_channel, g_channel, r_channel = cv2.split(image)
+    alpha_channel = mask * 255
+    alpha_channel = alpha_channel.astype(np.float64)
+    g_channel_out = np.clip(np.add(alpha_channel, g_channel), 0, 255)
+    g_channel_out = g_channel_out.astype(np.uint8)
+    alpha_channel = alpha_channel.astype(np.uint8)
+    img_BGRA = cv2.merge((b_channel, g_channel_out, r_channel, alpha_channel))
+    image_RGBA = cv2.cvtColor(img_BGRA, cv2.COLOR_BGRA2RGBA)
+    return image_RGBA, alpha_channel
+def resolution2framesize3cha(resolution):
+    if resolution == "640x240":
+        framesize = (240, 640, 3)
+    if resolution == "640x480":
+        framesize = (480, 640, 3)
+    if resolution == "1280x480":
+        framesize = (480, 1280, 3)
+    if resolution == "1280x720":
+        framesize = (720, 1280, 3)
+    if resolution == "960x540":
+        framesize = (540, 960, 3)
+    if resolution == "320x240":
+        framesize = (240, 320, 3)
+    if resolution == "1024x768":
+        framesize = (768, 1024, 3)
+    if resolution == "2560x960":
+        framesize = (960, 2560, 3)
+    if resolution == "2560x720":
+        framesize = (720, 2560, 3)
+    return framesize
+def resolution2framesize(resolution):
+    if resolution == "640x240":
+        framesize = (240, 640)
+    if resolution == "640x480":
+        framesize = (480, 640)
+    if resolution == "1280x480":
+        framesize = (480, 1280)
+    if resolution == "1280x720":
+        framesize = (720, 1280)
+    if resolution == "960x540":
+        framesize = (540, 960)
+    if resolution == "320x240":
+        framesize = (240, 320)
+    if resolution == "1024x768":
+        framesize = (768, 1024)
+    if resolution == "2560x960":
+        framesize = (960, 2560)
+    if resolution == "2560x720":
+        framesize = (720, 2560)
+    return framesize
+def webcam_test(model):
+    cap = cv2.VideoCapture(2)
+    cont = True
+    while cont:
+        # Capture a frame from camera
+        ret, frame = cap.read()
+        print(frame.shape)
+        if not ret:
+            break
+        # x = [frame]
+        frame = np.array(frame) / 255.0
+        x = np.reshape(frame, (1, 480, 640, 3))
+        # frame = cv2.resize(frame, (720,720))
+        # x = np.reshape(frame,(1,720,720,3))
+        start_t = time.time()
+        pred = model.predict(x)
+        duration = time.time() - start_t
+        pred = pred[0, :, :, :]
+        pred = np.argmax(pred, 2)
+        print(pred.shape)
+        overlap = add_mask(frame, pred)
+        print(duration)
+        cv2.imshow("Overlap", overlap)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+def image_test(model, img_dir, img_num, label_dir=None):
+    list_IDs = [f[:-4] for f in listdir(img_dir) if f[-4:] == ".jpg"]
+    img_path = img_dir + list_IDs[img_num] + ".jpg"
+    test_img = cv2.imread(img_path) / 255.0
+    test_img = np.reshape(test_img, (1, test_img.shape[0], test_img.shape[1], 3))
+    pred = model.predict(test_img)
+    pred = pred[0, :, :, :]
+    predict = np.argmax(pred, 2)
+    overlapping = add_mask(test_img[0, :, :, :], predict)
+    cv2.imshow("Prediction", overlapping)
+    cv2.imwrite(
+        "./models_repo/frozen_resnet/Trial11/prediction_" + str(img_num) + ".png",
+        overlapping,
+    )
+    if label_dir != None:
+        lab = label_dir + list_IDs[img_num] + ".png"
+        lab_img = cv2.imread(lab) * 255
+        lab_img = np.array(lab_img)
+        cv2.imshow("Label", lab_img)
+        cv2.imwrite(
+            "./models_repo/frozen_resnet/Trial10/label_" + str(img_num) + ".png",
+            lab_img,
+        )
+    cv2.waitKey(0)
+class PlotLosses(keras.callbacks.Callback):
+    def __init__(self, out_dir):
+        self.out_dir = out_dir
+    def on_train_begin(self, logs={}):
+        self.i = 0
+        self.x = []
+        self.losses = []
+        self.val_losses = []
+        # self.fig_loss = plt.figure()
+        self.train_iou = []
+        self.val_iou = []
+        # self.fig_iou = plt.figure()
+        self.live_loss = []
+        self.fig_livel = plt.figure()
+        self.live_iou = []
+        self.fig_livei = plt.figure()
+        self.logs = []
+        self.live_logs = []
+        self.b = 0
+        self.x_b = []
+        self.loss = 0
+        self.iou = 0
+        self.num = 0
+    def on_batch_end(self, batch, logs={}):
+        self.iou += logs.get("iou")
+        self.loss += logs.get("loss")
+        self.num += 1
+        if self.b % 50 == 0:
+            self.x_b.append(self.num)
+            self.live_loss.append(self.loss / float(self.b + 1))
+            self.live_iou.append(self.iou / float(self.b + 1))
+            clear_output(wait=True)
+            plt.ioff()
+            fig1 = plt.figure(1)
+            plt.ioff()
+            plt.plot(self.x_b, self.live_loss, label="Training loss")
+            plt.title("Training loss")
+            plt.xlabel("Iteration")
+            plt.ylabel("Loss")
+            plt.savefig(self.out_dir + "training_loss.png")
+            plt.close(fig1)
+            clear_output(wait=True)
+            fig2 = plt.figure(2)
+            plt.plot(self.x_b, self.live_iou, label="Training iou")
+            plt.title("Training IoU")
+            plt.xlabel("Iteration")
+            plt.ylabel("IoU")
+            plt.savefig(self.out_dir + "training_iou.png")
+            plt.close(fig2)
+        self.b += 1
+    def on_epoch_end(self, epoch, logs={}):
+        self.loss = 0
+        self.iou = 0
+        self.b = 0
+        self.logs.append(logs)
+        self.x.append(self.i)
+        self.losses.append(logs.get("loss"))
+        self.val_losses.append(logs.get("val_loss"))
+        self.i += 1
+        self.train_iou.append(logs.get("iou"))
+        self.val_iou.append(logs.get("val_iou"))
+        plt.ioff()
+        fig3 = plt.figure(3)
+        clear_output(wait=True)
+        plt.plot(self.x, self.losses, label="loss")
+        plt.plot(self.x, self.val_losses, label="val_loss")
+        plt.title("Loss curve")
+        plt.xlabel("Epoch")
+        plt.ylabel("Loss")
+        plt.legend()
+        plt.savefig(self.out_dir + "loss_curve.png")
+        plt.close(fig3)
+        fig4 = plt.figure(4)
+        clear_output(wait=True)
+        plt.plot(self.x, self.train_iou, label="train_iou")
+        plt.plot(self.x, self.val_iou, label="val_iou")
+        plt.title("IoU curve")
+        plt.xlabel("Epoch")
+        plt.ylabel("IoU")
+        plt.legend()
+        plt.savefig(self.out_dir + "mean_iou_curve.png")
+        plt.close(fig4)
+def step_decay(epoch):
+    initial_lrate = 0.1
+    drop = 0.5
+    epochs_drop = 10.0
+    lrate = initial_lrate * math.pow(drop, math.floor((epoch) / epochs_drop))
+    return lrate
+label_colours = [
+    (0, 0, 0),  # 0=background
+    # 1=wall,       2=floor,   3=cabinet,     4=bed,       5=chair
+    (128, 0, 0),
+    (0, 128, 0),
+    (128, 128, 0),
+    (0, 0, 128),
+    (128, 0, 128),
+    # 6=sofa,       7=table,           8=door,   9=window, 10=bookshelf
+    (0, 128, 128),
+    (128, 128, 128),
+    (255, 200, 180),
+    (192, 0, 0),
+    (192, 192, 192),
+    # 11=picture,      12=counter,  13=blinds,      14=desk,         15=shelves
+    (192, 128, 0),
+    (64, 0, 128),
+    (192, 0, 128),
+    (255, 128, 0),
+    (192, 128, 128),
+    # 16=curtain, 17=dresser, 18=pillow,    19=mirror,    20=floor_mat,    21=clothes
+    (0, 64, 0),
+    (128, 64, 0),
+    (0, 192, 0),
+    (153, 153, 255),
+    (0, 64, 128),
+    (255, 255, 0),
+    # 22=ceiling,     23=books,        24=fridge,       25=tv,         26=paper,      27=towel
+    (250, 250, 250),
+    (0, 192, 128),
+    (250, 102, 250),
+    (102, 250, 250),
+    (44, 166, 44),
+    (44, 44, 166),
+    # 28=shower_curtain, 29=box, 30=whiteboard, 31=person, 32=night_stand, 33=toilet
+    (166, 44, 44),
+    (0, 250, 0),
+    (250, 0, 0),
+    (0, 0, 250),
+    (206, 219, 156),
+    (219, 156, 206),
+    # 34=sink         #35=lamp         #36=bathtub     #37=bag         #38=Unknown
+    (156, 206, 219),
+    (23, 190, 207),
+    (207, 23, 190),
+    (190, 207, 23),
+    (153, 0, 76),
+]
+# #
+# label_colours = [(0, 0, 0),  # 0=background
+#                  #1=hand,
+#                 (128, 0, 0)]
+def decode_labels(mask, num_classes=38):
+    """Decode batch of segmentation masks.
+    Args:
+      mask: result of inference after taking argmax.
+      num_images: number of images to decode from the batch.
+      num_classes: number of classes to predict (including background).
+    Returns:
+      A batch with num_images RGB images of the same size as the input.
+    """
+    n, h, w, c = mask.shape
+    outputs = np.zeros((h, w, 3), dtype=np.uint8)
+    binary = np.zeros((h, w), dtype=np.uint8)
+    R = np.zeros((h, w), dtype=np.uint8)
+    G = np.zeros((h, w), dtype=np.uint8)
+    B = np.zeros((h, w), dtype=np.uint8)
+    for i in range(0, num_classes):
+        # print("i is",i)
+        # tlk.show_image(mask[0,:,:,i])
+        binary[mask[0, :, :, i] >= 0.5] = 1
+        binary[mask[0, :, :, i] < 0.5] = 0
+        # tlk.show_image(binary)
+        color_R = label_colours[i][0] * np.ones([h, w])
+        color_G = label_colours[i][1] * np.ones([h, w])
+        color_B = label_colours[i][2] * np.ones([h, w])
+        # print("colour_R.shape",color_R.shape)
+        R_aux = np.multiply(binary, color_R)
+        R_aux_int = R_aux.astype(np.uint8)
+        G_aux = np.multiply(binary, color_G)
+        G_aux_int = G_aux.astype(np.uint8)
+        B_aux = np.multiply(binary, color_B)
+        B_aux_int = B_aux.astype(np.uint8)
+        R += R_aux_int
+        G += G_aux_int
+        B += B_aux_int
+        R_ = R.reshape(*R.shape, 1)
+        G_ = G.reshape(*G.shape, 1)
+        B_ = B.reshape(*B.shape, 1)
+        outputs = np.concatenate((R_, G_, B_), axis=2)
+        # outputs[:, :,:] = (np.multiply(binary, label_colours[i]))
+    # tlk.show_image(outputs)
+    # img = Image.new('RGB', (len(mask[0, 0]), len(mask[0])))
+    # pixels = img.load()
+    # for j_, j in enumerate(mask[0, :, :, 0]):
+    #     for k_, k in enumerate(j):
+    #       if k < num_classes:
+    #         pixels[k_, j_] = label_colours[k]
+    #
+    # outputs = np.array(img)
+    return outputs