Spaces:

innat
/

HybridModel-GradCAM

Runtime error

App Files Files Community

innat commited on Jun 19, 2022

Commit

0f09377

1 Parent(s): f1deb8a

init

Browse files

Files changed (21) hide show

.gitignore +132 -0
README.md +9 -5
app.py +95 -0
config.py +30 -0
examples/daisy.jpg +0 -0
examples/dandelion.jpg +0 -0
examples/rose.jpg +0 -0
examples/sunflower.jpg +0 -0
examples/tulip.jpg +0 -0
layers/__init__.py +0 -0
layers/swin_blocks.py +139 -0
layers/window_attention.py +111 -0
models/__init__.py +1 -0
models/hybrid_model.py +170 -0
requirements.txt +7 -0
utils/__init__.py +0 -0
utils/drop_path.py +31 -0
utils/model_utils.py +46 -0
utils/patch.py +80 -0
utils/swin_window.py +25 -0
utils/viz_utils.py +64 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,132 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Pycharm
+.idea/

README.md CHANGED Viewed

@@ -1,12 +1,16 @@
 ---
-title: HybridModel GradCAM
-emoji: ⚡
 colorFrom: purple
-colorTo: purple
 sdk: gradio
-sdk_version: 3.0.19
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Demo
+emoji: 🔥
 colorFrom: purple
+colorTo: yellow
 sdk: gradio
+sdk_version: 3.0.15
 app_file: app.py
 pinned: false
 ---
+## Visual Interpretation of a Hybrid Model
+Building a hybrid model with *EfficientNet* and *Swin Transformer*, we have tried to inspect the visual interpretations of a CNN and Transformer blocks of a hybrid model (CNN + Swin Transformer) with the GradCAM technique. As a result, it appears that the transformer blocks are capable of globally refining feature activation across the relevant object, as opposed to the CNN, which is more focused on operating locally. However, the approach that will be shown here, is experimental. The workflow probably can generate a more meaningful modeling approach. The model is trained on [tf_flowers](https://www.tensorflow.org/datasets/catalog/tf_flowers) dataset, a multi-class classification problem.
+![]('./Presentation2.png')

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+import gdown
+import gradio as gr
+import tensorflow as tf
+from config import Parameters
+from models.hybrid_model import GradientAccumulation
+from utils.model_utils import *
+from utils.viz_utils import make_gradcam_heatmap
+from utils.viz_utils import save_and_display_gradcam
+image_size = Parameters().image_size
+str_labels = [
+    "daisy",
+    "dandelion",
+    "roses",
+    "sunflowers",
+    "tulips",
+]
+def get_model():
+    """Get the model."""
+    model = GradientAccumulation(
+        n_gradients=params.num_grad_accumulation, model_name="HybridModel"
+    )
+    _ = model(tf.ones((1, params.image_size, params.image_size, 3)))[0].shape
+    return model
+def get_model_weight(model_id):
+    """Get the trained weights."""
+    if not os.path.exists("model.h5"):
+        model_weight = gdown.download(id=model_id, quiet=False)
+    else:
+        model_weight = "model.h5"
+    return model_weight
+def load_model(model_id):
+    """Load trained model."""
+    weight = get_model_weight(model_id)
+    model = get_model()
+    model.load_weights(weight)
+    return model
+def image_process(image):
+    """Image preprocess for model input."""
+    image = tf.cast(image, dtype=tf.float32)
+    original_shape = image.shape
+    image = tf.image.resize(image, [image_size, image_size])
+    image = image[tf.newaxis, ...]
+    return image, original_shape
+def predict_fn(image):
+    """A predict function that will be invoked by gradio."""
+    loaded_model = load_model(model_id="1y6tseN0194T6d-4iIh5wo7RL9ttQERe0")
+    loaded_image, original_shape = image_process(image)
+    heatmap_a, heatmap_b, preds = make_gradcam_heatmap(loaded_image, loaded_model)
+    int_label = tf.argmax(preds, axis=-1).numpy()[0]
+    str_label = str_labels[int_label]
+    overaly_a = save_and_display_gradcam(
+        loaded_image[0], heatmap_a, image_shape=original_shape[:2]
+    )
+    overlay_b = save_and_display_gradcam(
+        loaded_image[0], heatmap_b, image_shape=original_shape[:2]
+    )
+    return [f"Predicted: {str_label}", overaly_a, overlay_b]
+iface = gr.Interface(
+    fn=predict_fn,
+    inputs=gr.inputs.Image(label="Input Image"),
+    outputs=[
+        gr.outputs.Label(label="Prediction"),
+        gr.inputs.Image(label="CNN GradCAM"),
+        gr.inputs.Image(label="Transformer GradCAM"),
+    ],
+    title="Hybrid EfficientNet Swin Transformer Demo",
+    description="The model is trained on tf_flowers dataset.",
+    examples=[
+        ["examples/dandelion.jpg"],
+        ["examples/sunflower.jpg"],
+        ["examples/tulip.jpg"],
+        ["examples/daisy.jpg"],
+        ["examples/rose.jpg"],
+    ],
+)
+iface.launch()

config.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import numpy as np
+import tensorflow as tf
+class Parameters:
+    # data level
+    image_count = 3670
+    image_size = 384
+    batch_size = 12
+    num_grad_accumulation = 8
+    label_smooth = 0.05
+    class_number = 5
+    val_split = 0.2
+    autotune = tf.data.AUTOTUNE
+    # hparams
+    epochs = 10
+    lr_sched = "cosine_restart"
+    lr_base = 0.016
+    lr_min = 0
+    lr_decay_epoch = 2.4
+    lr_warmup_epoch = 5
+    lr_decay_factor = 0.97
+    scaled_lr = lr_base * (batch_size / 256.0)
+    scaled_lr_min = lr_min * (batch_size / 256.0)
+    num_validation_sample = int(image_count * val_split)
+    num_training_sample = image_count - num_validation_sample
+    train_step = int(np.ceil(num_training_sample / float(batch_size)))
+    total_steps = train_step * epochs

examples/daisy.jpg ADDED Viewed

examples/dandelion.jpg ADDED Viewed

examples/rose.jpg ADDED Viewed

examples/sunflower.jpg ADDED Viewed

examples/tulip.jpg ADDED Viewed

layers/__init__.py ADDED Viewed

File without changes

layers/swin_blocks.py ADDED Viewed

	@@ -0,0 +1,139 @@

+try:
+    from jax import numpy as jnp
+except ModuleNotFoundError:
+    # jax doesn't support windows os yet.
+    import numpy as jnp
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from layers.window_attention import WindowAttention
+from utils.drop_path import DropPath
+from utils.swin_window import window_partition
+from utils.swin_window import window_reverse
+class SwinTransformer(layers.Layer):
+    def __init__(
+        self,
+        dim,
+        num_patch,
+        num_heads,
+        window_size=7,
+        shift_size=0,
+        num_mlp=1024,
+        qkv_bias=True,
+        dropout_rate=0.0,
+        **kwargs,
+    ):
+        super(SwinTransformer, self).__init__(**kwargs)
+        self.dim = dim
+        self.num_patch = num_patch
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.num_mlp = num_mlp
+        self.norm1 = layers.LayerNormalization(epsilon=1e-5)
+        self.attn = WindowAttention(
+            dim,
+            window_size=(self.window_size, self.window_size),
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            dropout_rate=dropout_rate,
+        )
+        self.drop_path = DropPath(dropout_rate) if dropout_rate > 0.0 else tf.identity
+        self.norm2 = layers.LayerNormalization(epsilon=1e-5)
+        self.mlp = keras.Sequential(
+            [
+                layers.Dense(num_mlp),
+                layers.Activation(keras.activations.gelu),
+                layers.Dropout(dropout_rate),
+                layers.Dense(dim),
+                layers.Dropout(dropout_rate),
+            ]
+        )
+        if min(self.num_patch) < self.window_size:
+            self.shift_size = 0
+            self.window_size = min(self.num_patch)
+    def build(self, input_shape):
+        if self.shift_size == 0:
+            self.attn_mask = None
+        else:
+            height, width = self.num_patch
+            h_slices = (
+                slice(0, -self.window_size),
+                slice(-self.window_size, -self.shift_size),
+                slice(-self.shift_size, None),
+            )
+            w_slices = (
+                slice(0, -self.window_size),
+                slice(-self.window_size, -self.shift_size),
+                slice(-self.shift_size, None),
+            )
+            mask_array = jnp.zeros((1, height, width, 1))
+            count = 0
+            for h in h_slices:
+                for w in w_slices:
+                    mask_array[:, h, w, :] = count
+                    count += 1
+            mask_array = tf.convert_to_tensor(mask_array)
+            # mask array to windows
+            mask_windows = window_partition(mask_array, self.window_size)
+            mask_windows = tf.reshape(
+                mask_windows, shape=[-1, self.window_size * self.window_size]
+            )
+            attn_mask = tf.expand_dims(mask_windows, axis=1) - tf.expand_dims(
+                mask_windows, axis=2
+            )
+            attn_mask = tf.where(attn_mask != 0, -100.0, attn_mask)
+            attn_mask = tf.where(attn_mask == 0, 0.0, attn_mask)
+            self.attn_mask = tf.Variable(initial_value=attn_mask, trainable=False)
+    def call(self, x):
+        height, width = self.num_patch
+        _, num_patches_before, channels = x.shape
+        x_skip = x
+        x = self.norm1(x)
+        x = tf.reshape(x, shape=(-1, height, width, channels))
+        if self.shift_size > 0:
+            shifted_x = tf.roll(
+                x, shift=[-self.shift_size, -self.shift_size], axis=[1, 2]
+            )
+        else:
+            shifted_x = x
+        x_windows = window_partition(shifted_x, self.window_size)
+        x_windows = tf.reshape(
+            x_windows, shape=(-1, self.window_size * self.window_size, channels)
+        )
+        attn_windows = self.attn(x_windows, mask=self.attn_mask)
+        attn_windows = tf.reshape(
+            attn_windows, shape=(-1, self.window_size, self.window_size, channels)
+        )
+        shifted_x = window_reverse(
+            attn_windows, self.window_size, height, width, channels
+        )
+        if self.shift_size > 0:
+            x = tf.roll(
+                shifted_x, shift=[self.shift_size, self.shift_size], axis=[1, 2]
+            )
+        else:
+            x = shifted_x
+        x = tf.reshape(x, shape=(-1, height * width, channels))
+        x = self.drop_path(x)
+        x = tf.cast(x_skip, dtype=tf.float32) + tf.cast(x, dtype=tf.float32)
+        x_skip = x
+        x = self.norm2(x)
+        x = self.mlp(x)
+        x = self.drop_path(x)
+        x = tf.cast(x_skip, dtype=tf.float32) + tf.cast(x, dtype=tf.float32)
+        return x

layers/window_attention.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import tensorflow as tf
+from tensorflow.keras import layers
+class WindowAttention(layers.Layer):
+    def __init__(
+        self,
+        dim,
+        window_size,
+        num_heads,
+        qkv_bias=True,
+        dropout_rate=0.0,
+        return_attention_scores=False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.dim = dim
+        self.window_size = window_size
+        self.num_heads = num_heads
+        self.scale = (dim // num_heads) ** -0.5
+        self.return_attention_scores = return_attention_scores
+        self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias)
+        self.dropout = layers.Dropout(dropout_rate)
+        self.proj = layers.Dense(dim)
+    def build(self, input_shape):
+        self.relative_position_bias_table = self.add_weight(
+            shape=(
+                (2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1),
+                self.num_heads,
+            ),
+            initializer="zeros",
+            trainable=True,
+            name="relative_position_bias_table",
+        )
+        self.relative_position_index = self.get_relative_position_index(
+            self.window_size[0], self.window_size[1]
+        )
+        super().build(input_shape)
+    def get_relative_position_index(self, window_height, window_width):
+        x_x, y_y = tf.meshgrid(range(window_height), range(window_width))
+        coords = tf.stack([y_y, x_x], axis=0)
+        coords_flatten = tf.reshape(coords, [2, -1])
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
+        relative_coords = tf.transpose(relative_coords, perm=[1, 2, 0])
+        x_x = (relative_coords[:, :, 0] + window_height - 1) * (2 * window_width - 1)
+        y_y = relative_coords[:, :, 1] + window_width - 1
+        relative_coords = tf.stack([x_x, y_y], axis=-1)
+        return tf.reduce_sum(relative_coords, axis=-1)
+    def call(self, x, mask=None):
+        _, size, channels = x.shape
+        head_dim = channels // self.num_heads
+        x_qkv = self.qkv(x)
+        x_qkv = tf.reshape(x_qkv, shape=(-1, size, 3, self.num_heads, head_dim))
+        x_qkv = tf.transpose(x_qkv, perm=(2, 0, 3, 1, 4))
+        q, k, v = x_qkv[0], x_qkv[1], x_qkv[2]
+        q = q * self.scale
+        k = tf.transpose(k, perm=(0, 1, 3, 2))
+        attn = q @ k
+        relative_position_bias = tf.gather(
+            self.relative_position_bias_table,
+            self.relative_position_index,
+            axis=0,
+        )
+        relative_position_bias = tf.transpose(relative_position_bias, [2, 0, 1])
+        attn = attn + tf.expand_dims(relative_position_bias, axis=0)
+        if mask is not None:
+            nW = mask.get_shape()[0]
+            mask_float = tf.cast(
+                tf.expand_dims(tf.expand_dims(mask, axis=1), axis=0), tf.float32
+            )
+            attn = (
+                tf.reshape(attn, shape=(-1, nW, self.num_heads, size, size))
+                + mask_float
+            )
+            attn = tf.reshape(attn, shape=(-1, self.num_heads, size, size))
+            attn = tf.nn.softmax(attn, axis=-1)
+        else:
+            attn = tf.nn.softmax(attn, axis=-1)
+        attn = self.dropout(attn)
+        x_qkv = attn @ v
+        x_qkv = tf.transpose(x_qkv, perm=(0, 2, 1, 3))
+        x_qkv = tf.reshape(x_qkv, shape=(-1, size, channels))
+        x_qkv = self.proj(x_qkv)
+        x_qkv = self.dropout(x_qkv)
+        if self.return_attention_scores:
+            return x_qkv, attn
+        else:
+            return x_qkv
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "dim": self.dim,
+                "window_size": self.window_size,
+                "num_heads": self.num_heads,
+                "scale": self.scale,
+            }
+        )
+        return config

models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

models/hybrid_model.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from layers.swin_blocks import SwinTransformer
+from utils.model_utils import *
+from utils.patch import PatchEmbedding
+from utils.patch import PatchExtract
+from utils.patch import PatchMerging
+class HybridSwinTransformer(keras.Model):
+    def __init__(self, model_name, **kwargs):
+        super().__init__(name=model_name, **kwargs)
+        # base models
+        base = keras.applications.EfficientNetB0(
+            include_top=False,
+            weights=None,
+            input_tensor=keras.Input((params.image_size, params.image_size, 3)),
+        )
+        # base model with compatible output which will be an input of transformer model
+        self.new_base = keras.Model(
+            [base.inputs],
+            [base.get_layer("block6a_expand_activation").output, base.output],
+            name="efficientnet",
+        )
+        # stuff of swin transformers
+        self.patch_extract = PatchExtract(patch_size)
+        self.patch_embedds = PatchEmbedding(num_patch_x * num_patch_y, embed_dim)
+        self.patch_merging = PatchMerging(
+            (num_patch_x, num_patch_y), embed_dim=embed_dim
+        )
+        # swin blocks containers
+        self.swin_sequences = keras.Sequential(name="swin_blocks")
+        for i in range(shift_size):
+            self.swin_sequences.add(
+                SwinTransformer(
+                    dim=embed_dim,
+                    num_patch=(num_patch_x, num_patch_y),
+                    num_heads=num_heads,
+                    window_size=window_size,
+                    shift_size=i,
+                    num_mlp=num_mlp,
+                    qkv_bias=qkv_bias,
+                    dropout_rate=dropout_rate,
+                )
+            )
+        # swin block's head
+        self.swin_head = keras.Sequential(
+            [
+                layers.GlobalAveragePooling1D(),
+                layers.AlphaDropout(0.5),
+                layers.BatchNormalization(),
+            ],
+            name="swin_head",
+        )
+        # base model's (cnn model) head
+        self.conv_head = keras.Sequential(
+            [
+                layers.GlobalAveragePooling2D(),
+                layers.AlphaDropout(0.5),
+            ],
+            name="conv_head",
+        )
+        # classifier
+        self.classifier = layers.Dense(
+            params.class_number, activation=None, dtype="float32"
+        )
+        self.build_graph()
+    def call(self, inputs, training=None, **kwargs):
+        x, base_gcam_top = self.new_base(inputs)
+        x = self.patch_extract(x)
+        x = self.patch_embedds(x)
+        x = self.swin_sequences(tf.cast(x, dtype=tf.float32))
+        x, swin_gcam_top = self.patch_merging(x)
+        swin_top = self.swin_head(x)
+        conv_top = self.conv_head(base_gcam_top)
+        preds = self.classifier(tf.concat([swin_top, conv_top], axis=-1))
+        if training:  # training phase
+            return preds
+        else:  # inference phase
+            return preds, base_gcam_top, swin_gcam_top
+    def build_graph(self):
+        x = keras.Input(shape=(params.image_size, params.image_size, 3))
+        return keras.Model(inputs=[x], outputs=self.call(x))
+class GradientAccumulation(HybridSwinTransformer):
+    """ref: https://gist.github.com/innat/ba6740293e7b7b227829790686f2119c"""
+    def __init__(self, n_gradients, **kwargs):
+        super().__init__(**kwargs)
+        self.n_gradients = tf.constant(n_gradients, dtype=tf.int32)
+        self.n_acum_step = tf.Variable(0, dtype=tf.int32, trainable=False)
+        self.gradient_accumulation = [
+            tf.Variable(tf.zeros_like(v, dtype=tf.float32), trainable=False)
+            for v in self.trainable_variables
+        ]
+    def train_step(self, data):
+        # track accumulation step update
+        self.n_acum_step.assign_add(1)
+        # Unpack the data. Its structure depends on your model and
+        # on what you pass to `fit()`.
+        x, y = data
+        with tf.GradientTape() as tape:
+            y_pred = self(x, training=True)  # Forward pass
+            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
+        # Calculate batch gradients
+        gradients = tape.gradient(loss, self.trainable_variables)
+        # Accumulate batch gradients
+        for i in range(len(self.gradient_accumulation)):
+            self.gradient_accumulation[i].assign_add(gradients[i])
+        # If n_acum_step reach the n_gradients then we apply accumulated gradients to -
+        # update the variables otherwise do nothing
+        tf.cond(
+            tf.equal(self.n_acum_step, self.n_gradients),
+            self.apply_accu_gradients,
+            lambda: None,
+        )
+        # Return a dict mapping metric names to current value.
+        # Note that it will include the loss (tracked in self.metrics).
+        self.compiled_metrics.update_state(y, y_pred)
+        return {m.name: m.result() for m in self.metrics}
+    def apply_accu_gradients(self):
+        # Update weights
+        self.optimizer.apply_gradients(
+            zip(self.gradient_accumulation, self.trainable_variables)
+        )
+        # reset accumulation step
+        self.n_acum_step.assign(0)
+        for i in range(len(self.gradient_accumulation)):
+            self.gradient_accumulation[i].assign(
+                tf.zeros_like(self.trainable_variables[i], dtype=tf.float32)
+            )
+    def test_step(self, data):
+        # Unpack the data
+        x, y = data
+        # Compute predictions
+        y_pred, base_gcam_top, swin_gcam_top = self(x, training=False)
+        # Updates the metrics tracking the loss
+        self.compiled_loss(y, y_pred, regularization_losses=self.losses)
+        # Update the metrics.
+        self.compiled_metrics.update_state(y, y_pred)
+        # Return a dict mapping metric names to current value.
+        # Note that it will include the loss (tracked in self.metrics).
+        return {m.name: m.result() for m in self.metrics}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+tensorflow==2.6.4
+jax==0.3.13
+jaxlib
+numpy
+matplotlib==3.5.2
+gradio==3.0.15
+gdown==4.4.0

utils/__init__.py ADDED Viewed

File without changes

utils/drop_path.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import tensorflow as tf
+from tensorflow.keras import backend
+from tensorflow.keras import layers
+class DropPath(layers.Layer):
+    def __init__(self, drop_prob=None, **kwargs):
+        super(DropPath, self).__init__(**kwargs)
+        self.drop_prob = drop_prob
+    def call(self, inputs, training=None):
+        if self.drop_prob == 0.0 or not training:
+            return inputs
+        else:
+            batch_size = tf.shape(inputs)[0]
+            keep_prob = 1 - self.drop_prob
+            path_mask_shape = (batch_size,) + (1,) * (len(tf.shape(inputs)) - 1)
+            path_mask = tf.floor(backend.random_bernoulli(path_mask_shape, p=keep_prob))
+            outputs = (
+                tf.math.divide(tf.cast(inputs, dtype=tf.float32), keep_prob) * path_mask
+            )
+            return outputs
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "drop_prob": self.drop_prob,
+            }
+        )
+        return config

utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import numpy as np
+import tensorflow as tf
+class Parameters:
+    # data level
+    image_count = 3670
+    image_size = 384
+    batch_size = 12
+    num_grad_accumulation = 8
+    class_number = 5
+    val_split = 0.2
+    autotune = tf.data.AUTOTUNE
+    # hparams
+    epochs = 10
+    lr_sched = "cosine_restart"
+    lr_base = 0.016
+    lr_min = 0
+    lr_decay_epoch = 2.4
+    lr_warmup_epoch = 5
+    lr_decay_factor = 0.97
+    scaled_lr = lr_base * (batch_size / 256.0)
+    scaled_lr_min = lr_min * (batch_size / 256.0)
+    num_validation_sample = int(image_count * val_split)
+    num_training_sample = image_count - num_validation_sample
+    train_step = int(np.ceil(num_training_sample / float(batch_size)))
+    total_steps = train_step * epochs
+params = Parameters()
+patch_size = (2, 2)  # 4-by-4 sized patches
+dropout_rate = 0.5  # Dropout rate
+num_heads = 8  # Attention heads
+embed_dim = 64  # Embedding dimension
+num_mlp = 128  # MLP layer size
+qkv_bias = True  # Convert embedded patches to query, key, and values with a learnable additive value
+window_size = 2  # Size of attention window
+shift_size = 1  # Size of shifting window
+image_dimension = 24  # Initial image size / Input size of the transformer model
+num_patch_x = image_dimension // patch_size[0]
+num_patch_y = image_dimension // patch_size[1]

utils/patch.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import tensorflow as tf
+from tensorflow.keras import layers
+class PatchExtract(layers.Layer):
+    def __init__(self, patch_size, **kwargs):
+        super().__init__(**kwargs)
+        self.patch_size_x = patch_size[0]
+        self.patch_size_y = patch_size[0]
+    def call(self, images):
+        batch_size = tf.shape(images)[0]
+        patches = tf.image.extract_patches(
+            images=images,
+            sizes=(1, self.patch_size_x, self.patch_size_y, 1),
+            strides=(1, self.patch_size_x, self.patch_size_y, 1),
+            rates=(1, 1, 1, 1),
+            padding="VALID",
+        )
+        patch_dim = patches.shape[-1]
+        patch_num = patches.shape[1]
+        return tf.reshape(patches, (batch_size, patch_num * patch_num, patch_dim))
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "patch_size_y": self.patch_size_y,
+                "patch_size_x": self.patch_size_x,
+            }
+        )
+        return config
+class PatchEmbedding(layers.Layer):
+    def __init__(self, num_patch, embed_dim, **kwargs):
+        super().__init__(**kwargs)
+        self.num_patch = num_patch
+        self.proj = layers.Dense(embed_dim)
+        self.pos_embed = layers.Embedding(input_dim=num_patch, output_dim=embed_dim)
+    def call(self, patch):
+        pos = tf.range(start=0, limit=self.num_patch, delta=1)
+        return self.proj(patch) + self.pos_embed(pos)
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "num_patch": self.num_patch,
+            }
+        )
+        return config
+class PatchMerging(layers.Layer):
+    def __init__(self, num_patch, embed_dim):
+        super().__init__()
+        self.num_patch = num_patch
+        self.embed_dim = embed_dim
+        self.linear_trans = layers.Dense(2 * embed_dim, use_bias=False)
+    def call(self, x):
+        height, width = self.num_patch
+        _, _, C = x.get_shape().as_list()
+        x = tf.reshape(x, shape=(-1, height, width, C))
+        feat_maps = x
+        x0 = x[:, 0::2, 0::2, :]
+        x1 = x[:, 1::2, 0::2, :]
+        x2 = x[:, 0::2, 1::2, :]
+        x3 = x[:, 1::2, 1::2, :]
+        x = tf.concat((x0, x1, x2, x3), axis=-1)
+        x = tf.reshape(x, shape=(-1, (height // 2) * (width // 2), 4 * C))
+        return self.linear_trans(x), feat_maps
+    def get_config(self):
+        config = super().get_config()
+        config.update({"num_patch": self.num_patch, "embed_dim": self.embed_dim})
+        return config

utils/swin_window.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import tensorflow as tf
+def window_partition(x, window_size):
+    _, height, width, channels = x.shape
+    patch_num_y = height // window_size
+    patch_num_x = width // window_size
+    x = tf.reshape(
+        x, shape=(-1, patch_num_y, window_size, patch_num_x, window_size, channels)
+    )
+    x = tf.transpose(x, (0, 1, 3, 2, 4, 5))
+    windows = tf.reshape(x, shape=(-1, window_size, window_size, channels))
+    return windows
+def window_reverse(windows, window_size, height, width, channels):
+    patch_num_y = height // window_size
+    patch_num_x = width // window_size
+    x = tf.reshape(
+        windows,
+        shape=(-1, patch_num_y, patch_num_x, window_size, window_size, channels),
+    )
+    x = tf.transpose(x, perm=(0, 1, 3, 2, 4, 5))
+    x = tf.reshape(x, shape=(-1, height, width, channels))
+    return x

utils/viz_utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import matplotlib.cm as cm
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+def make_gradcam_heatmap(img_array, grad_model, pred_index=None):
+    with tf.GradientTape(persistent=True) as tape:
+        preds, base_top, swin_top = grad_model(img_array)
+        if pred_index is None:
+            pred_index = tf.argmax(preds[0])
+        class_channel = preds[:, pred_index]
+    grads = tape.gradient(class_channel, base_top)
+    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
+    base_top = base_top[0]
+    heatmap_a = base_top @ pooled_grads[..., tf.newaxis]
+    heatmap_a = tf.squeeze(heatmap_a)
+    heatmap_a = tf.maximum(heatmap_a, 0) / tf.math.reduce_max(heatmap_a)
+    heatmap_a = heatmap_a.numpy()
+    grads = tape.gradient(class_channel, swin_top)
+    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
+    swin_top = swin_top[0]
+    heatmap_b = swin_top @ pooled_grads[..., tf.newaxis]
+    heatmap_b = tf.squeeze(heatmap_b)
+    heatmap_b = tf.maximum(heatmap_b, 0) / tf.math.reduce_max(heatmap_b)
+    heatmap_b = heatmap_b.numpy()
+    return heatmap_a, heatmap_b, preds
+def save_and_display_gradcam(
+    img,
+    heatmap,
+    target=None,
+    pred=None,
+    cam_path="cam.jpg",
+    cmap="jet",  # inferno, viridis
+    alpha=0.6,
+    plot=None,
+    image_shape=None,
+):
+    # Rescale heatmap to a range 0-255
+    heatmap = np.uint8(255 * heatmap)
+    # Use jet colormap to colorize heatmap
+    jet = cm.get_cmap(cmap)
+    # Use RGB values of the colormap
+    jet_colors = jet(np.arange(256))[:, :3]
+    jet_heatmap = jet_colors[heatmap]
+    # Create an image with RGB colorized heatmap
+    jet_heatmap = keras.utils.array_to_img(jet_heatmap)
+    jet_heatmap = jet_heatmap.resize((img.shape[0], img.shape[1]))
+    jet_heatmap = keras.utils.img_to_array(jet_heatmap)
+    # Superimpose the heatmap on original image
+    superimposed_img = img + jet_heatmap * alpha
+    superimposed_img = keras.utils.array_to_img(superimposed_img)
+    size_w, size_h = image_shape[:2]
+    superimposed_img = superimposed_img.resize((size_h, size_w))
+    return superimposed_img