Spaces:

awsaf49
/

gcvit-tf

Running

App Files Files Community

awsaf49 commited on Aug 12, 2022

Commit

4a0cabe

•

1 Parent(s): 69a7cee

lastest version

Browse files

Files changed (27) hide show

.gitattributes +31 -31
README.md +13 -13
app.py +32 -32
example/Standing_jaguar.jpg +0 -0
gcvit/__init__.py +1 -1
gcvit/__pycache__/__init__.cpython-38.pyc +0 -0
gcvit/layers/__init__.py +7 -7
gcvit/layers/__pycache__/__init__.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/attention.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/block.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/drop.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/embedding.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/feature.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/level.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/window.cpython-38.pyc +0 -0
gcvit/layers/block.py +98 -98
gcvit/layers/embedding.py +1 -1
gcvit/layers/feature.py +254 -201
gcvit/layers/level.py +84 -92
gcvit/models/__init__.py +1 -1
gcvit/models/__pycache__/__init__.cpython-38.pyc +0 -0
gcvit/models/__pycache__/gcvit.cpython-38.pyc +0 -0
gcvit/models/gcvit.py +180 -145
gcvit/utils/gradcam.py +68 -68
gcvit/version.py +1 -1
requirements.txt +4 -4
setup.py +49 -49

.gitattributes CHANGED Viewed

@@ -1,31 +1,31 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zstandard filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
----
-title: Gcvit Tf
-emoji: 📈
-colorFrom: yellow
-colorTo: purple
-sdk: gradio
-sdk_version: 3.1.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Gcvit Tf
+emoji: 📈
+colorFrom: yellow
+colorTo: purple
+sdk: gradio
+sdk_version: 3.1.0
+app_file: app.py
+pinned: false
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,33 +1,33 @@
-import tensorflow as tf
-import gradio as gr
-import gcvit
-from gcvit.utils import get_gradcam_model, get_gradcam_prediction
-def predict_fn(image, model_name):
-    """A predict function that will be invoked by gradio."""
-    model = getattr(gcvit, model_name)(pretrain=True)
-    gradcam_model = get_gradcam_model(model)
-    preds, overlay = get_gradcam_prediction(image, gradcam_model, cmap='jet', alpha=0.4, pred_index=None)
-    preds = {x[1]:float(x[2]) for x in preds}
-    return [preds, overlay]
-demo = gr.Interface(
-    fn=predict_fn,
-    inputs=[
-        gr.inputs.Image(label="Input Image"),
-        gr.Radio(['GCViTTiny', 'GCViTSmall', 'GCViTBase'], value='GCViTTiny', label='Model Name')
-        ],
-    outputs=[
-        gr.outputs.Label(label="Prediction"),
-        gr.inputs.Image(label="GradCAM"),
-    ],
-    title="Global Context Vision Transformer (GCViT) Demo",
-    description="Image Classification with GCViT Model using ImageNet Pretrain Weights.",
-    examples=[
-        ["example/hot_air_ballon.jpg", 'GCViTTiny'],
-        ["example/chelsea.png", 'GCViTTiny'],
-        ["example/penguin.JPG", 'GCViTTiny'],
-        ["example/bus.jpg", 'GCViTTiny'],
-    ],
-)
 demo.launch()

+import tensorflow as tf
+import gradio as gr
+import gcvit
+from gcvit.utils import get_gradcam_model, get_gradcam_prediction
+def predict_fn(image, model_name):
+    """A predict function that will be invoked by gradio."""
+    model = getattr(gcvit, model_name)(pretrain=True)
+    gradcam_model = get_gradcam_model(model)
+    preds, overlay = get_gradcam_prediction(image, gradcam_model, cmap='jet', alpha=0.4, pred_index=None)
+    preds = {x[1]:float(x[2]) for x in preds}
+    return [preds, overlay]
+demo = gr.Interface(
+    fn=predict_fn,
+    inputs=[
+        gr.inputs.Image(label="Input Image"),
+        gr.Radio(['GCViTTiny', 'GCViTSmall', 'GCViTBase'], value='GCViTTiny', label='Model Name')
+        ],
+    outputs=[
+        gr.outputs.Label(label="Prediction"),
+        gr.inputs.Image(label="GradCAM"),
+    ],
+    title="Global Context Vision Transformer (GCViT) Demo",
+    description="Image Classification with GCViT Model using ImageNet Pretrain Weights.",
+    examples=[
+        ["example/hot_air_ballon.jpg", 'GCViTTiny'],
+        ["example/chelsea.png", 'GCViTTiny'],
+        ["example/penguin.JPG", 'GCViTTiny'],
+        ["example/bus.jpg", 'GCViTTiny'],
+    ],
+)
 demo.launch()

example/Standing_jaguar.jpg ADDED Viewed

gcvit/__init__.py CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- from .models import GCViT, GCViTTiny, GCViTSmall, GCViTBase
2	from .version import __version__


1	+ from .models import GCViT, GCViTXXTiny, GCViTXTiny, GCViTTiny, GCViTSmall, GCViTBase
2	from .version import __version__

gcvit/__pycache__/__init__.cpython-38.pyc DELETED Viewed

Binary file (228 Bytes)

gcvit/layers/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from .window import window_partition, window_reverse
-from .attention import WindowAttention
-from .drop import DropPath, Identity
-from .embedding import PatchEmbed
-from .feature import Mlp, FeatExtract, ReduceSize, SE, Resizing
-from .block import GCViTBlock
-from .level import GCViTLayer

+from .window import window_partition, window_reverse
+from .attention import WindowAttention
+from .drop import DropPath, Identity
+from .embedding import Stem
+from .feature import Mlp, FeatExtract, ReduceSize, SE, Resizing
+from .block import GCViTBlock
+from .level import GCViTLevel

gcvit/layers/__pycache__/__init__.cpython-38.pyc DELETED Viewed

Binary file (530 Bytes)

gcvit/layers/__pycache__/attention.cpython-38.pyc DELETED Viewed

Binary file (3.58 kB)

gcvit/layers/__pycache__/block.cpython-38.pyc DELETED Viewed

Binary file (3 kB)

gcvit/layers/__pycache__/drop.cpython-38.pyc DELETED Viewed

Binary file (1.8 kB)

gcvit/layers/__pycache__/embedding.cpython-38.pyc DELETED Viewed

Binary file (1.39 kB)

gcvit/layers/__pycache__/feature.cpython-38.pyc DELETED Viewed

Binary file (5.5 kB)

gcvit/layers/__pycache__/level.cpython-38.pyc DELETED Viewed

Binary file (3 kB)

gcvit/layers/__pycache__/window.cpython-38.pyc DELETED Viewed

Binary file (801 Bytes)

gcvit/layers/block.py CHANGED Viewed

@@ -1,99 +1,99 @@
-import tensorflow as tf
-from .attention import WindowAttention
-from .drop import DropPath
-from .window import window_partition, window_reverse
-from .feature import Mlp, FeatExtract
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class GCViTBlock(tf.keras.layers.Layer):
-    def __init__(self, window_size, num_heads, global_query, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0.,
-                 attn_drop=0., path_drop=0., act_layer='gelu', layer_scale=None, **kwargs):
-        super().__init__(**kwargs)
-        self.window_size = window_size
-        self.num_heads = num_heads
-        self.global_query = global_query
-        self.mlp_ratio = mlp_ratio
-        self.qkv_bias = qkv_bias
-        self.qk_scale = qk_scale
-        self.drop = drop
-        self.attn_drop = attn_drop
-        self.path_drop = path_drop
-        self.act_layer = act_layer
-        self.layer_scale = layer_scale
-    def build(self, input_shape):
-        B, H, W, C = input_shape[0]
-        self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm1')
-        self.attn = WindowAttention(window_size=self.window_size,
-                                   num_heads=self.num_heads,
-                                   global_query=self.global_query,
-                                   qkv_bias=self.qkv_bias,
-                                   qk_scale=self.qk_scale,
-                                   attn_dropout=self.attn_drop,
-                                   proj_dropout=self.drop,
-                                   name='attn')
-        self.drop_path1 = DropPath(self.path_drop)
-        self.drop_path2 = DropPath(self.path_drop)
-        self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm2')
-        self.mlp = Mlp(hidden_features=int(C * self.mlp_ratio), dropout=self.drop, act_layer=self.act_layer, name='mlp')
-        if self.layer_scale is not None:
-            self.gamma1 = self.add_weight(
-                'gamma1',
-                shape=[C],
-                initializer=tf.keras.initializers.Constant(self.layer_scale),
-                trainable=True,
-                dtype=self.dtype)
-            self.gamma2 = self.add_weight(
-                'gamma2',
-                shape=[C],
-                initializer=tf.keras.initializers.Constant(self.layer_scale),
-                trainable=True,
-                dtype=self.dtype)
-        else:
-            self.gamma1 = 1.0
-            self.gamma2 = 1.0
-        self.num_windows = int(H // self.window_size) * int(W // self.window_size)
-        super().build(input_shape)
-    def call(self, inputs, **kwargs):
-        if self.global_query:
-            inputs, q_global = inputs
-        else:
-            inputs = inputs[0]
-        B, H, W, C = tf.unstack(tf.shape(inputs), num=4)
-        x = self.norm1(inputs)
-        # create windows and concat them in batch axis
-        x = window_partition(x, self.window_size)  # (B_, win_h, win_w, C)
-        # flatten patch
-        x = tf.reshape(x, shape=[-1, self.window_size * self.window_size, C])  # (B_, N, C) => (batch*num_win, num_token, feature)
-        # attention
-        if self.global_query:
-            x = self.attn([x, q_global])
-        else:
-            x = self.attn([x])
-        # reverse window partition
-        x = window_reverse(x, self.window_size, H, W, C)
-        # FFN
-        x = inputs + self.drop_path1(x * self.gamma1)
-        x = x + self.drop_path2(self.gamma2 * self.mlp(self.norm2(x)))
-        return x
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'window_size': self.window_size,
-            'num_heads': self.num_heads,
-            'global_query': self.global_query,
-            'mlp_ratio': self.mlp_ratio,
-            'qkv_bias': self.qkv_bias,
-            'qk_scale': self.qk_scale,
-            'drop': self.drop,
-            'attn_drop': self.attn_drop,
-            'path_drop': self.path_drop,
-            'act_layer': self.act_layer,
-            'layer_scale': self.layer_scale,
-            'num_windows': self.num_windows,
-        })
         return config

+import tensorflow as tf
+from .attention import WindowAttention
+from .drop import DropPath
+from .window import window_partition, window_reverse
+from .feature import Mlp, FeatExtract
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class GCViTBlock(tf.keras.layers.Layer):
+    def __init__(self, window_size, num_heads, global_query, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0.,
+                 attn_drop=0., path_drop=0., act_layer='gelu', layer_scale=None, **kwargs):
+        super().__init__(**kwargs)
+        self.window_size = window_size
+        self.num_heads = num_heads
+        self.global_query = global_query
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.drop = drop
+        self.attn_drop = attn_drop
+        self.path_drop = path_drop
+        self.act_layer = act_layer
+        self.layer_scale = layer_scale
+    def build(self, input_shape):
+        B, H, W, C = input_shape[0]
+        self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm1')
+        self.attn = WindowAttention(window_size=self.window_size,
+                                   num_heads=self.num_heads,
+                                   global_query=self.global_query,
+                                   qkv_bias=self.qkv_bias,
+                                   qk_scale=self.qk_scale,
+                                   attn_dropout=self.attn_drop,
+                                   proj_dropout=self.drop,
+                                   name='attn')
+        self.drop_path1 = DropPath(self.path_drop)
+        self.drop_path2 = DropPath(self.path_drop)
+        self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm2')
+        self.mlp = Mlp(hidden_features=int(C * self.mlp_ratio), dropout=self.drop, act_layer=self.act_layer, name='mlp')
+        if self.layer_scale is not None:
+            self.gamma1 = self.add_weight(
+                'gamma1',
+                shape=[C],
+                initializer=tf.keras.initializers.Constant(self.layer_scale),
+                trainable=True,
+                dtype=self.dtype)
+            self.gamma2 = self.add_weight(
+                'gamma2',
+                shape=[C],
+                initializer=tf.keras.initializers.Constant(self.layer_scale),
+                trainable=True,
+                dtype=self.dtype)
+        else:
+            self.gamma1 = 1.0
+            self.gamma2 = 1.0
+        self.num_windows = int(H // self.window_size) * int(W // self.window_size)
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        if self.global_query:
+            inputs, q_global = inputs
+        else:
+            inputs = inputs[0]
+        B, H, W, C = tf.unstack(tf.shape(inputs), num=4)
+        x = self.norm1(inputs)
+        # create windows and concat them in batch axis
+        x = window_partition(x, self.window_size)  # (B_, win_h, win_w, C)
+        # flatten patch
+        x = tf.reshape(x, shape=[-1, self.window_size * self.window_size, C])  # (B_, N, C) => (batch*num_win, num_token, feature)
+        # attention
+        if self.global_query:
+            x = self.attn([x, q_global])
+        else:
+            x = self.attn([x])
+        # reverse window partition
+        x = window_reverse(x, self.window_size, H, W, C)
+        # FFN
+        x = inputs + self.drop_path1(x * self.gamma1)
+        x = x + self.drop_path2(self.gamma2 * self.mlp(self.norm2(x)))
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'window_size': self.window_size,
+            'num_heads': self.num_heads,
+            'global_query': self.global_query,
+            'mlp_ratio': self.mlp_ratio,
+            'qkv_bias': self.qkv_bias,
+            'qk_scale': self.qk_scale,
+            'drop': self.drop,
+            'attn_drop': self.attn_drop,
+            'path_drop': self.path_drop,
+            'act_layer': self.act_layer,
+            'layer_scale': self.layer_scale,
+            'num_windows': self.num_windows,
+        })
         return config

gcvit/layers/embedding.py CHANGED Viewed

@@ -4,7 +4,7 @@ from .feature import ReduceSize
 @tf.keras.utils.register_keras_serializable(package="gcvit")
-class PatchEmbed(tf.keras.layers.Layer):
     def __init__(self, dim, **kwargs):
         super().__init__(**kwargs)
         self.dim = dim

 @tf.keras.utils.register_keras_serializable(package="gcvit")
+class Stem(tf.keras.layers.Layer):
     def __init__(self, dim, **kwargs):
         super().__init__(**kwargs)
         self.dim = dim

gcvit/layers/feature.py CHANGED Viewed

@@ -1,202 +1,255 @@
-import tensorflow as tf
-import tensorflow_addons as tfa
-H_AXIS = -3
-W_AXIS = -2
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class Mlp(tf.keras.layers.Layer):
-    def __init__(self, hidden_features=None, out_features=None, act_layer='gelu', dropout=0., **kwargs):
-        super().__init__(**kwargs)
-        self.hidden_features = hidden_features
-        self.out_features = out_features
-        self.act_layer = act_layer
-        self.dropout = dropout
-    def build(self, input_shape):
-        self.in_features = input_shape[-1]
-        self.hidden_features = self.hidden_features or self.in_features
-        self.out_features = self.out_features or self.in_features
-        self.fc1 = tf.keras.layers.Dense(self.hidden_features, name="fc1")
-        self.act = tf.keras.layers.Activation(self.act_layer, name="act")
-        self.fc2 = tf.keras.layers.Dense(self.out_features, name="fc2")
-        self.drop1 = tf.keras.layers.Dropout(self.dropout, name="drop1")
-        self.drop2 = tf.keras.layers.Dropout(self.dropout, name="drop2")
-        super().build(input_shape)
-    def call(self, inputs, **kwargs):
-        x = self.fc1(inputs)
-        x = self.act(x)
-        x = self.drop1(x)
-        x = self.fc2(x)
-        x = self.drop2(x)
-        return x
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            "hidden_features":self.hidden_features,
-            "out_features":self.out_features,
-            "act_layer":self.act_layer,
-            "dropout":self.dropout
-            })
-        return config
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class SE(tf.keras.layers.Layer):
-    def __init__(self, oup=None, expansion=0.25, **kwargs):
-        super().__init__(**kwargs)
-        self.expansion = expansion
-        self.oup = oup
-    def build(self, input_shape):
-        inp = input_shape[-1]
-        self.oup = self.oup or inp
-        self.avg_pool = tfa.layers.AdaptiveAveragePooling2D(1, name="avg_pool")
-        self.fc = [
-            tf.keras.layers.Dense(int(inp * self.expansion), use_bias=False, name='fc/0'),
-            tf.keras.layers.Activation('gelu', name='fc/1'),
-            tf.keras.layers.Dense(self.oup, use_bias=False, name='fc/2'),
-            tf.keras.layers.Activation('sigmoid', name='fc/3')
-            ]
-        super().build(input_shape)
-    def call(self, inputs, **kwargs):
-        b, _, _, c = tf.unstack(tf.shape(inputs), num=4)
-        x = tf.reshape(self.avg_pool(inputs), (b, c))
-        for layer in self.fc:
-            x = layer(x)
-        x = tf.reshape(x, (b, 1, 1, c))
-        return x*inputs
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'expansion': self.expansion,
-            'oup': self.oup,
-            })
-        return config
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class ReduceSize(tf.keras.layers.Layer):
-    def __init__(self, keep_dim=False, **kwargs):
-        super().__init__(**kwargs)
-        self.keep_dim = keep_dim
-    def build(self, input_shape):
-        dim = input_shape[-1]
-        dim_out = dim if self.keep_dim else 2*dim
-        self.pad1 = tf.keras.layers.ZeroPadding2D(1, name='pad1')
-        self.pad2 = tf.keras.layers.ZeroPadding2D(1, name='pad2')
-        self.conv = [
-            tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=1, padding='valid', use_bias=False, name='conv/0'),
-            tf.keras.layers.Activation('gelu', name='conv/1'),
-            SE(name='conv/2'),
-            tf.keras.layers.Conv2D(dim, kernel_size=1, strides=1, padding='valid', use_bias=False, name='conv/3')
-        ]
-        self.reduction = tf.keras.layers.Conv2D(dim_out, kernel_size=3, strides=2, padding='valid', use_bias=False,
-                                                name='reduction')
-        self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm1')  # eps like PyTorch
-        self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm2')
-        super().build(input_shape)
-    def call(self, inputs, **kwargs):
-        x = self.norm1(inputs)
-        xr = self.pad1(x)  # if pad had weights it would've thrown error with .save_weights()
-        for layer in self.conv:
-            xr = layer(xr)
-        x = x + xr
-        x = self.pad2(x)
-        x = self.reduction(x)
-        x = self.norm2(x)
-        return x
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            "keep_dim":self.keep_dim,
-        })
-        return config
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class FeatExtract(tf.keras.layers.Layer):
-    def __init__(self, keep_dim=False, **kwargs):
-        super().__init__(**kwargs)
-        self.keep_dim = keep_dim
-    def build(self, input_shape):
-        dim = input_shape[-1]
-        self.pad1 = tf.keras.layers.ZeroPadding2D(1, name='pad1')
-        self.pad2 = tf.keras.layers.ZeroPadding2D(1, name='pad2')
-        self.conv = [
-            tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=1, padding='valid', use_bias=False, name='conv/0'),
-            tf.keras.layers.Activation('gelu', name='conv/1'),
-            SE(name='conv/2'),
-            tf.keras.layers.Conv2D(dim, kernel_size=1, strides=1, padding='valid', use_bias=False, name='conv/3')
-        ]
-        if not self.keep_dim:
-            self.pool = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='valid', name='pool')
-        # else:
-        #     self.pool = tf.keras.layers.Activation('linear', name='identity')  # hack for PyTorch nn.Identity layer ;)
-        super().build(input_shape)
-    def call(self, inputs, **kwargs):
-        x = inputs
-        xr = self.pad1(x)
-        for layer in self.conv:
-            xr = layer(xr)
-        x = x + xr # if pad had weights it would've thrown error with .save_weights()
-        if not self.keep_dim:
-            x = self.pad2(x)
-            x = self.pool(x)
-        return x
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            "keep_dim":self.keep_dim,
-        })
-        return config
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class Resizing(tf.keras.layers.Layer):
-    def __init__(self,
-               height,
-               width,
-               interpolation='bilinear',
-               **kwargs):
-        self.height = height
-        self.width = width
-        self.interpolation = interpolation
-        super().__init__(**kwargs)
-    def call(self, inputs):
-        # tf.image.resize will always output float32 and operate more efficiently on
-        # float32 unless interpolation is nearest, in which case ouput type matches
-        # input type.
-        if self.interpolation == 'nearest':
-            input_dtype = self.compute_dtype
-        else:
-            input_dtype = tf.float32
-        inputs = tf.cast(inputs, dtype=input_dtype)
-        size = [self.height, self.width]
-        outputs = tf.image.resize(
-            inputs,
-            size=size,
-            method=self.interpolation)
-        return tf.cast(outputs, self.compute_dtype)
-    def compute_output_shape(self, input_shape):
-        input_shape = tf.TensorShape(input_shape).as_list()
-        input_shape[H_AXIS] = self.height
-        input_shape[W_AXIS] = self.width
-        return tf.TensorShape(input_shape)
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'height': self.height,
-            'width': self.width,
-            'interpolation': self.interpolation,
-            })
         return config

+import tensorflow as tf
+import tensorflow_addons as tfa
+H_AXIS = -3
+W_AXIS = -2
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class Mlp(tf.keras.layers.Layer):
+    def __init__(self, hidden_features=None, out_features=None, act_layer='gelu', dropout=0., **kwargs):
+        super().__init__(**kwargs)
+        self.hidden_features = hidden_features
+        self.out_features = out_features
+        self.act_layer = act_layer
+        self.dropout = dropout
+    def build(self, input_shape):
+        self.in_features = input_shape[-1]
+        self.hidden_features = self.hidden_features or self.in_features
+        self.out_features = self.out_features or self.in_features
+        self.fc1 = tf.keras.layers.Dense(self.hidden_features, name="fc1")
+        self.act = tf.keras.layers.Activation(self.act_layer, name="act")
+        self.fc2 = tf.keras.layers.Dense(self.out_features, name="fc2")
+        self.drop1 = tf.keras.layers.Dropout(self.dropout, name="drop1")
+        self.drop2 = tf.keras.layers.Dropout(self.dropout, name="drop2")
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = self.fc1(inputs)
+        x = self.act(x)
+        x = self.drop1(x)
+        x = self.fc2(x)
+        x = self.drop2(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "hidden_features":self.hidden_features,
+            "out_features":self.out_features,
+            "act_layer":self.act_layer,
+            "dropout":self.dropout
+            })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class SE(tf.keras.layers.Layer):
+    def __init__(self, oup=None, expansion=0.25, **kwargs):
+        super().__init__(**kwargs)
+        self.expansion = expansion
+        self.oup = oup
+    def build(self, input_shape):
+        inp = input_shape[-1]
+        self.oup = self.oup or inp
+        self.avg_pool = tfa.layers.AdaptiveAveragePooling2D(1, name="avg_pool")
+        self.fc = [
+            tf.keras.layers.Dense(int(inp * self.expansion), use_bias=False, name='fc/0'),
+            tf.keras.layers.Activation('gelu', name='fc/1'),
+            tf.keras.layers.Dense(self.oup, use_bias=False, name='fc/2'),
+            tf.keras.layers.Activation('sigmoid', name='fc/3')
+            ]
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        b, _, _, c = tf.unstack(tf.shape(inputs), num=4)
+        x = tf.reshape(self.avg_pool(inputs), (b, c))
+        for layer in self.fc:
+            x = layer(x)
+        x = tf.reshape(x, (b, 1, 1, c))
+        return x*inputs
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'expansion': self.expansion,
+            'oup': self.oup,
+            })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class ReduceSize(tf.keras.layers.Layer):
+    def __init__(self, keep_dim=False, **kwargs):
+        super().__init__(**kwargs)
+        self.keep_dim = keep_dim
+    def build(self, input_shape):
+        dim = input_shape[-1]
+        dim_out = dim if self.keep_dim else 2*dim
+        self.pad1 = tf.keras.layers.ZeroPadding2D(1, name='pad1')
+        self.pad2 = tf.keras.layers.ZeroPadding2D(1, name='pad2')
+        self.conv = [
+            tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=1, padding='valid', use_bias=False, name='conv/0'),
+            tf.keras.layers.Activation('gelu', name='conv/1'),
+            SE(name='conv/2'),
+            tf.keras.layers.Conv2D(dim, kernel_size=1, strides=1, padding='valid', use_bias=False, name='conv/3')
+        ]
+        self.reduction = tf.keras.layers.Conv2D(dim_out, kernel_size=3, strides=2, padding='valid', use_bias=False,
+                                                name='reduction')
+        self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm1')  # eps like PyTorch
+        self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm2')
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = self.norm1(inputs)
+        xr = self.pad1(x)  # if pad had weights it would've thrown error with .save_weights()
+        for layer in self.conv:
+            xr = layer(xr)
+        x = x + xr
+        x = self.pad2(x)
+        x = self.reduction(x)
+        x = self.norm2(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "keep_dim":self.keep_dim,
+        })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class FeatExtract(tf.keras.layers.Layer):
+    def __init__(self, keep_dim=False, **kwargs):
+        super().__init__(**kwargs)
+        self.keep_dim = keep_dim
+    def build(self, input_shape):
+        dim = input_shape[-1]
+        self.pad1 = tf.keras.layers.ZeroPadding2D(1, name='pad1')
+        self.pad2 = tf.keras.layers.ZeroPadding2D(1, name='pad2')
+        self.conv = [
+            tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=1, padding='valid', use_bias=False, name='conv/0'),
+            tf.keras.layers.Activation('gelu', name='conv/1'),
+            SE(name='conv/2'),
+            tf.keras.layers.Conv2D(dim, kernel_size=1, strides=1, padding='valid', use_bias=False, name='conv/3')
+        ]
+        if not self.keep_dim:
+            self.pool = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='valid', name='pool')
+        # else:
+        #     self.pool = tf.keras.layers.Activation('linear', name='identity')  # hack for PyTorch nn.Identity layer ;)
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = inputs
+        xr = self.pad1(x)
+        for layer in self.conv:
+            xr = layer(xr)
+        x = x + xr # if pad had weights it would've thrown error with .save_weights()
+        if not self.keep_dim:
+            x = self.pad2(x)
+            x = self.pool(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "keep_dim":self.keep_dim,
+        })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class  GlobalQueryGen(tf.keras.layers.Layer):
+    """
+    Global query generator based on: "Hatamizadeh et al.,
+    Global Context Vision Transformers <https://arxiv.org/abs/2206.09959>"
+    """
+    def __init__(self, keep_dims=False, **kwargs):
+        super().__init__(**kwargs)
+        self.keep_dims = keep_dims
+    def build(self, input_shape):
+        self.to_q_global = [FeatExtract(keep_dim, name=f'to_q_global/{i}') \
+                            for i, keep_dim in enumerate(self.keep_dims)]
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = inputs
+        for layer in self.to_q_global:
+            x = layer(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "keep_dims":self.keep_dims,
+        })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class Resizing(tf.keras.layers.Layer):
+    def __init__(self,
+               height,
+               width,
+               interpolation='bilinear',
+               **kwargs):
+        self.height = height
+        self.width = width
+        self.interpolation = interpolation
+        super().__init__(**kwargs)
+    def call(self, inputs):
+        # tf.image.resize will always output float32 and operate more efficiently on
+        # float32 unless interpolation is nearest, in which case ouput type matches
+        # input type.
+        if self.interpolation == 'nearest':
+            input_dtype = self.compute_dtype
+        else:
+            input_dtype = tf.float32
+        inputs = tf.cast(inputs, dtype=input_dtype)
+        size = [self.height, self.width]
+        outputs = tf.image.resize(
+            inputs,
+            size=size,
+            method=self.interpolation)
+        return tf.cast(outputs, self.compute_dtype)
+    def compute_output_shape(self, input_shape):
+        input_shape = tf.TensorShape(input_shape).as_list()
+        input_shape[H_AXIS] = self.height
+        input_shape[W_AXIS] = self.width
+        return tf.TensorShape(input_shape)
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'height': self.height,
+            'width': self.width,
+            'interpolation': self.interpolation,
+            })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class FitWindow(tf.keras.layers.Layer):
+    "Pad feature to fit window"
+    def __init__(self, window_size, **kwargs):
+        super().__init__(**kwargs)
+        self.window_size = window_size
+    def call(self, inputs):
+        B, H, W, C = tf.unstack(tf.shape(inputs), num=4)
+        # pad to multiple of window_size
+        h_pad = (self.window_size - H % self.window_size) % self.window_size
+        w_pad = (self.window_size - W % self.window_size) % self.window_size
+        x = tf.pad(inputs, [[0, 0],
+                            [h_pad//2, (h_pad//2 + h_pad%2)],  # padding in both directions unlike tfgcvit
+                            [w_pad//2, (w_pad//2 + w_pad%2)],
+                            [0, 0]])
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'window_size': self.window_size,
+            })
         return config

gcvit/layers/level.py CHANGED Viewed

@@ -1,93 +1,85 @@
-import tensorflow as tf
-from .feature import FeatExtract, ReduceSize, Resizing
-from .block import GCViTBlock
-@tf.keras.utils.register_keras_serializable(package="gcvit")
-class GCViTLayer(tf.keras.layers.Layer):
-    def __init__(self, depth, num_heads, window_size, keep_dims, downsample=True, mlp_ratio=4., qkv_bias=True,
-                qk_scale=None, drop=0., attn_drop=0., path_drop=0., layer_scale=None, resize_query=False, **kwargs):
-        super().__init__(**kwargs)
-        self.depth = depth
-        self.num_heads = num_heads
-        self.window_size = window_size
-        self.keep_dims = keep_dims
-        self.downsample = downsample
-        self.mlp_ratio = mlp_ratio
-        self.qkv_bias = qkv_bias
-        self.qk_scale = qk_scale
-        self.drop = drop
-        self.attn_drop = attn_drop
-        self.path_drop = path_drop
-        self.layer_scale = layer_scale
-        self.resize_query = resize_query
-    def build(self, input_shape):
-        path_drop = [self.path_drop] * self.depth if not isinstance(self.path_drop, list) else self.path_drop
-        self.blocks = [
-            GCViTBlock(window_size=self.window_size,
-                      num_heads=self.num_heads,
-                      global_query=bool(i % 2),
-                      mlp_ratio=self.mlp_ratio,
-                      qkv_bias=self.qkv_bias,
-                      qk_scale=self.qk_scale,
-                      drop=self.drop,
-                      attn_drop=self.attn_drop,
-                      path_drop=path_drop[i],
-                      layer_scale=self.layer_scale,
-                      name=f'blocks/{i}')
-            for i in range(self.depth)]
-        self.down = ReduceSize(keep_dim=False, name='downsample')
-        self.to_q_global = [
-            FeatExtract(keep_dim, name=f'to_q_global/{i}')
-            for i, keep_dim in enumerate(self.keep_dims)]
-        self.resize = Resizing(self.window_size, self.window_size, interpolation='bicubic')
-        super().build(input_shape)
-    def call(self, inputs, **kwargs):
-        height, width = tf.unstack(tf.shape(inputs)[1:3], num=2)
-        # pad to multiple of window_size
-        h_pad = (self.window_size - height % self.window_size) % self.window_size
-        w_pad = (self.window_size - width % self.window_size) % self.window_size
-        x = tf.pad(inputs, [[0, 0],
-                            [h_pad//2, (h_pad//2 + h_pad%2)],  # padding in both directions unlike tfgcvit
-                            [w_pad//2, (w_pad//2 + w_pad%2)],
-                            [0, 0]])
-        # generate global query
-        q_global = x  # (B, H, W, C)
-        for layer in self.to_q_global:
-            q_global = layer(q_global)  #  official impl issue: https://github.com/NVlabs/GCVit/issues/13
-        # resize query to fit key-value, but result in poor score with official weights?
-        if self.resize_query:
-            q_global = self.resize(q_global)  # to avoid mismatch between feat_map and q_global: https://github.com/NVlabs/GCVit/issues/9
-        # feature_map -> windows -> window_attention -> feature_map
-        for i, blk in enumerate(self.blocks):
-            if i % 2:
-                x = blk([x, q_global])
-            else:
-                x = blk([x])
-        x = x[:, :height, :width, :]  # https://github.com/NVlabs/GCVit/issues/9
-        # set shape for [B, ?, ?, C]
-        x.set_shape(inputs.shape)  # `tf.reshape` creates new tensor with new_shape
-        # downsample
-        if self.downsample:
-          x = self.down(x)
-        return x
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'depth': self.depth,
-            'num_heads': self.num_heads,
-            'window_size': self.window_size,
-            'keep_dims': self.keep_dims,
-            'downsample': self.downsample,
-            'mlp_ratio': self.mlp_ratio,
-            'qkv_bias': self.qkv_bias,
-            'qk_scale': self.qk_scale,
-            'drop': self.drop,
-            'attn_drop': self.attn_drop,
-            'path_drop': self.path_drop,
-            'layer_scale': self.layer_scale
-        })
         return config

+import tensorflow as tf
+from .feature import GlobalQueryGen, ReduceSize, Resizing, FitWindow
+from .block import GCViTBlock
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class GCViTLevel(tf.keras.layers.Layer):
+    def __init__(self, depth, num_heads, window_size, keep_dims, downsample=True, mlp_ratio=4., qkv_bias=True,
+                qk_scale=None, drop=0., attn_drop=0., path_drop=0., layer_scale=None, resize_query=False, **kwargs):
+        super().__init__(**kwargs)
+        self.depth = depth
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.keep_dims = keep_dims
+        self.downsample = downsample
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.drop = drop
+        self.attn_drop = attn_drop
+        self.path_drop = path_drop
+        self.layer_scale = layer_scale
+        self.resize_query = resize_query
+    def build(self, input_shape):
+        path_drop = [self.path_drop] * self.depth if not isinstance(self.path_drop, list) else self.path_drop
+        self.blocks = [
+            GCViTBlock(window_size=self.window_size,
+                      num_heads=self.num_heads,
+                      global_query=bool(i % 2),
+                      mlp_ratio=self.mlp_ratio,
+                      qkv_bias=self.qkv_bias,
+                      qk_scale=self.qk_scale,
+                      drop=self.drop,
+                      attn_drop=self.attn_drop,
+                      path_drop=path_drop[i],
+                      layer_scale=self.layer_scale,
+                      name=f'blocks/{i}')
+            for i in range(self.depth)]
+        self.down = ReduceSize(keep_dim=False, name='downsample')
+        self.q_global_gen = GlobalQueryGen(self.keep_dims, name='q_global_gen')
+        self.resize = Resizing(self.window_size, self.window_size, interpolation='bicubic')
+        self.fit_window = FitWindow(self.window_size)
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        H, W = tf.unstack(tf.shape(inputs)[1:3], num=2)
+        # pad to fit window_size
+        x = self.fit_window(inputs)
+        # generate global query
+        q_global = self.q_global_gen(x) # (B, H, W, C)  # official impl issue: https://github.com/NVlabs/GCVit/issues/13
+        # resize query to fit key-value, but result in poor score with official weights?
+        if self.resize_query:
+            q_global = self.resize(q_global)  # to avoid mismatch between feat_map and q_global: https://github.com/NVlabs/GCVit/issues/9
+        # feature_map -> windows -> window_attention -> feature_map
+        for i, blk in enumerate(self.blocks):
+            if i % 2:
+                x = blk([x, q_global])
+            else:
+                x = blk([x])
+        x = x[:, :H, :W, :]  # https://github.com/NVlabs/GCVit/issues/9
+        # set shape for [B, ?, ?, C]
+        x.set_shape(inputs.shape)  # `tf.reshape` creates new tensor with new_shape
+        # downsample
+        if self.downsample:
+          x = self.down(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'depth': self.depth,
+            'num_heads': self.num_heads,
+            'window_size': self.window_size,
+            'keep_dims': self.keep_dims,
+            'downsample': self.downsample,
+            'mlp_ratio': self.mlp_ratio,
+            'qkv_bias': self.qkv_bias,
+            'qk_scale': self.qk_scale,
+            'drop': self.drop,
+            'attn_drop': self.attn_drop,
+            'path_drop': self.path_drop,
+            'layer_scale': self.layer_scale
+        })
         return config

gcvit/models/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .gcvit import GCViT, GCViTTiny, GCViTSmall, GCViTBase


1	+ from .gcvit import GCViT, GCViTXXTiny, GCViTXTiny, GCViTTiny, GCViTSmall, GCViTBase

gcvit/models/__pycache__/__init__.cpython-38.pyc DELETED Viewed

Binary file (234 Bytes)

gcvit/models/__pycache__/gcvit.cpython-38.pyc DELETED Viewed

Binary file (4.08 kB)

gcvit/models/gcvit.py CHANGED Viewed

@@ -1,145 +1,180 @@
-import numpy as np
-import tensorflow as tf
-from ..layers import PatchEmbed, GCViTLayer, Identity
-BASE_URL = 'https://github.com/awsaf49/gcvit-tf/releases/download'
-TAG = 'v1.0.4'
-NAME2CONFIG = {
-    'gcvit_tiny': {'window_size': (7, 7, 14, 7),
-                    'dim': 64,
-                    'depths': (3, 4, 19, 5),
-                    'num_heads': (2, 4, 8, 16),
-                    'path_drop': 0.2,},
-    'gcvit_small': {'window_size': (7, 7, 14, 7),
-                     'dim': 96,
-                     'depths': (3, 4, 19, 5),
-                     'num_heads': (3, 6, 12, 24),
-                     'mlp_ratio': 2.,
-                     'path_drop': 0.3,
-                     'layer_scale': 1e-5,},
-    'gcvit_base': {'window_size': (7, 7, 14, 7),
-                    'dim':128,
-                    'depths': (3, 4, 19, 5),
-                    'num_heads': (4, 8, 16, 32),
-                    'mlp_ratio': 2.,
-                    'path_drop': 0.5,
-                    'layer_scale': 1e-5,},
-    }
-@tf.keras.utils.register_keras_serializable(package='gcvit')
-class GCViT(tf.keras.Model):
-    def __init__(self, window_size, dim, depths, num_heads,
-        drop_rate=0., mlp_ratio=3., qkv_bias=True, qk_scale=None, attn_drop=0., path_drop=0.1, layer_scale=None, resize_query=False,
-        global_pool='avg', num_classes=1000, head_act='softmax', **kwargs):
-        super().__init__(**kwargs)
-        self.window_size = window_size
-        self.dim = dim
-        self.depths = depths
-        self.num_heads = num_heads
-        self.drop_rate = drop_rate
-        self.mlp_ratio = mlp_ratio
-        self.qkv_bias = qkv_bias
-        self.qk_scale = qk_scale
-        self.attn_drop = attn_drop
-        self.path_drop = path_drop
-        self.layer_scale = layer_scale
-        self.resize_query = resize_query
-        self.global_pool = global_pool
-        self.num_classes = num_classes
-        self.head_act = head_act
-        self.patch_embed = PatchEmbed(dim=dim, name='patch_embed')
-        self.pos_drop = tf.keras.layers.Dropout(drop_rate, name='pos_drop')
-        path_drops = np.linspace(0., path_drop, sum(depths))
-        keep_dims = [(False, False, False),(False, False),(True,),(True,),]
-        self.levels = []
-        for i in range(len(depths)):
-            path_drop = path_drops[sum(depths[:i]):sum(depths[:i + 1])].tolist()
-            level = GCViTLayer(depth=depths[i], num_heads=num_heads[i], window_size=window_size[i], keep_dims=keep_dims[i],
-                    downsample=(i < len(depths) - 1), mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
-                    drop=drop_rate, attn_drop=attn_drop, path_drop=path_drop, layer_scale=layer_scale, resize_query=resize_query,
-                    name=f'levels/{i}')
-            self.levels.append(level)
-        self.norm = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm')
-        if global_pool == 'avg':
-            self.pool = tf.keras.layers.GlobalAveragePooling2D(name='pool')
-        elif global_pool == 'max':
-            self.pool = tf.keras.layers.GlobalMaxPooling2D(name='pool')
-        elif global_pool is None:
-            self.pool = Identity(name='pool')
-        else:
-            raise ValueError(f'Expecting pooling to be one of None/avg/max. Found: {global_pool}')
-        self.head = [tf.keras.layers.Dense(num_classes, name='head/fc'),
-                     tf.keras.layers.Activation(head_act, name='head/act')]
-    def reset_classifier(self, num_classes, head_act, global_pool=None):
-        self.num_classes = num_classes
-        if global_pool is not None:
-            self.global_pool = global_pool
-        self.head[0] = tf.keras.layers.Dense(num_classes, name='head/fc') if num_classes else Identity(name='head/fc')
-        self.head[1] = tf.keras.layers.Activation(head_act, name='head/act') if head_act else Identity(name='head/act')
-        super().build((1, 224, 224, 3))
-    def forward_features(self, inputs):
-        x = self.patch_embed(inputs)
-        x = self.pos_drop(x)
-        x = tf.cast(x, dtype=tf.float32)
-        for level in self.levels:
-            x = level(x)
-        x = self.norm(x)
-        return x
-    def forward_head(self, inputs, pre_logits=False):
-        x = inputs
-        if self.global_pool in ['avg', 'max']:
-            x = self.pool(x)
-        if not pre_logits:
-            for layer in self.head:
-                x = layer(x)
-        return x
-    def call(self, inputs, **kwargs):
-        x = self.forward_features(inputs)
-        x = self.forward_head(x)
-        return x
-    def build_graph(self, input_shape=(224, 224, 3)):
-        """https://www.kaggle.com/code/ipythonx/tf-hybrid-efficientnet-swin-transformer-gradcam"""
-        x = tf.keras.Input(shape=input_shape)
-        return tf.keras.Model(inputs=[x], outputs=self.call(x), name=self.name)
-# load standard models
-def GCViTTiny(pretrain=False, **kwargs):
-    name = 'gcvit_tiny'
-    config = NAME2CONFIG[name]
-    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
-    model = GCViT(name=name, **config, **kwargs)
-    model(tf.random.uniform(shape=(1, 224, 224, 3)))
-    if pretrain:
-        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
-        model.load_weights(ckpt_path)
-    return model
-def GCViTSmall(pretrain=False, **kwargs):
-    name = 'gcvit_small'
-    config = NAME2CONFIG[name]
-    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
-    model = GCViT(name=name, **config, **kwargs)
-    model(tf.random.uniform(shape=(1, 224, 224, 3)))
-    if pretrain:
-        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
-        model.load_weights(ckpt_path)
-    return model
-def GCViTBase(pretrain=False, **kwargs):
-    name = 'gcvit_base'
-    config = NAME2CONFIG[name]
-    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
-    model = GCViT(name=name, **config, **kwargs)
-    model(tf.random.uniform(shape=(1, 224, 224, 3)))
-    if pretrain:
-        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
-        model.load_weights(ckpt_path)
-    return model

+import numpy as np
+import tensorflow as tf
+from ..layers import Stem, GCViTLevel, Identity
+BASE_URL = 'https://github.com/awsaf49/gcvit-tf/releases/download'
+TAG = 'v1.0.9'
+NAME2CONFIG = {
+    'gcvit_xxtiny': {'window_size': (7, 7, 14, 7),
+                    'dim': 64,
+                    'depths': (2, 2, 6, 2),
+                    'num_heads': (2, 4, 8, 16),
+                    'mlp_ratio': 3.,
+                    'path_drop': 0.2},
+    'gcvit_xtiny': {'window_size': (7, 7, 14, 7),
+                    'dim': 64,
+                    'depths': (3, 4, 6, 5),
+                    'num_heads': (2, 4, 8, 16),
+                    'mlp_ratio': 3.,
+                    'path_drop': 0.2},
+    'gcvit_tiny': {'window_size': (7, 7, 14, 7),
+                    'dim': 64,
+                    'depths': (3, 4, 19, 5),
+                    'num_heads': (2, 4, 8, 16),
+                    'mlp_ratio': 3.,
+                    'path_drop': 0.2,},
+    'gcvit_small': {'window_size': (7, 7, 14, 7),
+                     'dim': 96,
+                     'depths': (3, 4, 19, 5),
+                     'num_heads': (3, 6, 12, 24),
+                     'mlp_ratio': 2.,
+                     'path_drop': 0.3,
+                     'layer_scale': 1e-5,},
+    'gcvit_base': {'window_size': (7, 7, 14, 7),
+                    'dim':128,
+                    'depths': (3, 4, 19, 5),
+                    'num_heads': (4, 8, 16, 32),
+                    'mlp_ratio': 2.,
+                    'path_drop': 0.5,
+                    'layer_scale': 1e-5,},
+    }
+@tf.keras.utils.register_keras_serializable(package='gcvit')
+class GCViT(tf.keras.Model):
+    def __init__(self, window_size, dim, depths, num_heads,
+        drop_rate=0., mlp_ratio=3., qkv_bias=True, qk_scale=None, attn_drop=0., path_drop=0.1, layer_scale=None, resize_query=False,
+        global_pool='avg', num_classes=1000, head_act='softmax', **kwargs):
+        super().__init__(**kwargs)
+        self.window_size = window_size
+        self.dim = dim
+        self.depths = depths
+        self.num_heads = num_heads
+        self.drop_rate = drop_rate
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.attn_drop = attn_drop
+        self.path_drop = path_drop
+        self.layer_scale = layer_scale
+        self.resize_query = resize_query
+        self.global_pool = global_pool
+        self.num_classes = num_classes
+        self.head_act = head_act
+        self.patch_embed = Stem(dim=dim, name='patch_embed')
+        self.pos_drop = tf.keras.layers.Dropout(drop_rate, name='pos_drop')
+        path_drops = np.linspace(0., path_drop, sum(depths))
+        keep_dims = [(False, False, False),(False, False),(True,),(True,),]
+        self.levels = []
+        for i in range(len(depths)):
+            path_drop = path_drops[sum(depths[:i]):sum(depths[:i + 1])].tolist()
+            level = GCViTLevel(depth=depths[i], num_heads=num_heads[i], window_size=window_size[i], keep_dims=keep_dims[i],
+                    downsample=(i < len(depths) - 1), mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                    drop=drop_rate, attn_drop=attn_drop, path_drop=path_drop, layer_scale=layer_scale, resize_query=resize_query,
+                    name=f'levels/{i}')
+            self.levels.append(level)
+        self.norm = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm')
+        if global_pool == 'avg':
+            self.pool = tf.keras.layers.GlobalAveragePooling2D(name='pool')
+        elif global_pool == 'max':
+            self.pool = tf.keras.layers.GlobalMaxPooling2D(name='pool')
+        elif global_pool is None:
+            self.pool = Identity(name='pool')
+        else:
+            raise ValueError(f'Expecting pooling to be one of None/avg/max. Found: {global_pool}')
+        self.head = tf.keras.layers.Dense(num_classes, name='head', activation=head_act)
+    def reset_classifier(self, num_classes, head_act, global_pool=None, in_channels=3):
+        self.num_classes = num_classes
+        if global_pool is not None:
+            self.global_pool = global_pool
+        self.head = tf.keras.layers.Dense(num_classes, name='head', activation=head_act) if num_classes else Identity(name='head')
+        super().build((1, 224, 224, in_channels)) # for head we only need info from the input channel
+    def forward_features(self, inputs):
+        x = self.patch_embed(inputs)
+        x = self.pos_drop(x)
+        x = tf.cast(x, dtype=tf.float32)
+        for level in self.levels:
+            x = level(x)
+        x = self.norm(x)
+        return x
+    def forward_head(self, inputs, pre_logits=False):
+        x = inputs
+        if self.global_pool in ['avg', 'max']:
+            x = self.pool(x)
+        if not pre_logits:
+            x = self.head(x)
+        return x
+    def call(self, inputs, **kwargs):
+        x = self.forward_features(inputs)
+        x = self.forward_head(x)
+        return x
+    def build_graph(self, input_shape=(224, 224, 3)):
+        """https://www.kaggle.com/code/ipythonx/tf-hybrid-efficientnet-swin-transformer-gradcam"""
+        x = tf.keras.Input(shape=input_shape)
+        return tf.keras.Model(inputs=[x], outputs=self.call(x), name=self.name)
+    def summary(self, input_shape=(224, 224, 3)):
+        return self.build_graph(input_shape).summary()
+# load standard models
+def GCViTXXTiny(input_shape=(224, 224, 3), pretrain=False, resize_query=False, **kwargs):
+    name = 'gcvit_xxtiny'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, resize_query=resize_query, **config,  **kwargs)
+    model(tf.random.uniform(shape=input_shape)[tf.newaxis,])
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model
+def GCViTXTiny(input_shape=(224, 224, 3), pretrain=False, resize_query=False, **kwargs):
+    name = 'gcvit_xtiny'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, resize_query=resize_query, **config,  **kwargs)
+    model(tf.random.uniform(shape=input_shape)[tf.newaxis,])
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model
+def GCViTTiny(input_shape=(224, 224, 3), pretrain=False, resize_query=False, **kwargs):
+    name = 'gcvit_tiny'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, resize_query=resize_query, **config,  **kwargs)
+    model(tf.random.uniform(shape=input_shape)[tf.newaxis,])
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model
+def GCViTSmall(input_shape=(224, 224, 3), pretrain=False, resize_query=False, **kwargs):
+    name = 'gcvit_small'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, resize_query=resize_query, **config, **kwargs)
+    model(tf.random.uniform(shape=input_shape)[tf.newaxis,])
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model
+def GCViTBase(input_shape=(224, 224, 3), pretrain=False, resize_query=False, **kwargs):
+    name = 'gcvit_base'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, resize_query=resize_query, **config, **kwargs)
+    model(tf.random.uniform(shape=input_shape)[tf.newaxis,])
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model

gcvit/utils/gradcam.py CHANGED Viewed

@@ -1,69 +1,69 @@
-import tensorflow as tf
-import matplotlib.cm as cm
-import numpy as np
-try:
-    from tensorflow.keras.utils import array_to_img, img_to_array
-except:
-    from tensorflow.keras.preprocessing.image import array_to_img, img_to_array
-def process_image(img, size=(224, 224)):
-    img_array = tf.keras.applications.imagenet_utils.preprocess_input(img, mode='torch')
-    img_array = tf.image.resize(img_array, size,)[None,]
-    return img_array
-def get_gradcam_model(model):
-    inp = tf.keras.Input(shape=(224, 224, 3))
-    feats = model.forward_features(inp)
-    preds = model.forward_head(feats)
-    return tf.keras.models.Model(inp, [preds, feats])
-def get_gradcam_prediction(img, grad_model, process=True, decode=True, pred_index=None, cmap='jet', alpha=0.4):
-    """Grad-CAM for a single image
-    Args:
-        img (np.ndarray): process or raw image without batch_shape e.g. (224, 224, 3)
-        grad_model (tf.keras.Model): model with feature map and prediction
-        process (bool, optional): imagenet pre-processing. Defaults to True.
-        pred_index (int, optional): for particular calss. Defaults to None.
-        cmap (str, optional): colormap. Defaults to 'jet'.
-        alpha (float, optional): opacity. Defaults to 0.4.
-    Returns:
-        preds_decode: top5 predictions
-        heatmap: gradcam heatmap
-    """
-    # process image for inference
-    if process:
-        img_array = process_image(img)
-    else:
-        img_array = tf.convert_to_tensor(img)[None,]
-        if img.min()!=img.max():
-            img = (img - img.min())/(img.max() - img.min())
-            img = np.uint8(img*255.0)
-    # get prediction
-    with tf.GradientTape(persistent=True) as tape:
-        preds, feats = grad_model(img_array)
-        if pred_index is None:
-            pred_index = tf.argmax(preds[0])
-        class_channel = preds[:, pred_index]
-    # compute heatmap
-    grads = tape.gradient(class_channel, feats)
-    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
-    feats = feats[0]
-    heatmap = feats @ pooled_grads[..., tf.newaxis]
-    heatmap = tf.squeeze(heatmap)
-    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
-    heatmap = heatmap.numpy()
-    heatmap = np.uint8(255 * heatmap)
-    # colorize heatmap
-    cmap = cm.get_cmap(cmap)
-    colors = cmap(np.arange(256))[:, :3]
-    heatmap = colors[heatmap]
-    heatmap = array_to_img(heatmap)
-    heatmap = heatmap.resize((img.shape[1], img.shape[0]))
-    heatmap = img_to_array(heatmap)
-    overlay = img + heatmap * alpha
-    overlay = array_to_img(overlay)
-    # decode prediction
-    preds_decode = tf.keras.applications.imagenet_utils.decode_predictions(preds.numpy())[0] if decode else preds
     return preds_decode, overlay

+import tensorflow as tf
+import matplotlib.cm as cm
+import numpy as np
+try:
+    from tensorflow.keras.utils import array_to_img, img_to_array
+except:
+    from tensorflow.keras.preprocessing.image import array_to_img, img_to_array
+def process_image(img, size=(224, 224)):
+    img_array = tf.keras.applications.imagenet_utils.preprocess_input(img, mode='torch')
+    img_array = tf.image.resize(img_array, size,)[None,]
+    return img_array
+def get_gradcam_model(model):
+    inp = tf.keras.Input(shape=(224, 224, 3))
+    feats = model.forward_features(inp)
+    preds = model.forward_head(feats)
+    return tf.keras.models.Model(inp, [preds, feats])
+def get_gradcam_prediction(img, grad_model, process=True, decode=True, pred_index=None, cmap='jet', alpha=0.4):
+    """Grad-CAM for a single image
+    Args:
+        img (np.ndarray): process or raw image without batch_shape e.g. (224, 224, 3)
+        grad_model (tf.keras.Model): model with feature map and prediction
+        process (bool, optional): imagenet pre-processing. Defaults to True.
+        pred_index (int, optional): for particular calss. Defaults to None.
+        cmap (str, optional): colormap. Defaults to 'jet'.
+        alpha (float, optional): opacity. Defaults to 0.4.
+    Returns:
+        preds_decode: top5 predictions
+        heatmap: gradcam heatmap
+    """
+    # process image for inference
+    if process:
+        img_array = process_image(img)
+    else:
+        img_array = tf.convert_to_tensor(img)[None,]
+        if img.min()!=img.max():
+            img = (img - img.min())/(img.max() - img.min())
+            img = np.uint8(img*255.0)
+    # get prediction
+    with tf.GradientTape(persistent=True) as tape:
+        preds, feats = grad_model(img_array)
+        if pred_index is None:
+            pred_index = tf.argmax(preds[0])
+        class_channel = preds[:, pred_index]
+    # compute heatmap
+    grads = tape.gradient(class_channel, feats)
+    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
+    feats = feats[0]
+    heatmap = feats @ pooled_grads[..., tf.newaxis]
+    heatmap = tf.squeeze(heatmap)
+    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
+    heatmap = heatmap.numpy()
+    heatmap = np.uint8(255 * heatmap)
+    # colorize heatmap
+    cmap = cm.get_cmap(cmap)
+    colors = cmap(np.arange(256))[:, :3]
+    heatmap = colors[heatmap]
+    heatmap = array_to_img(heatmap)
+    heatmap = heatmap.resize((img.shape[1], img.shape[0]))
+    heatmap = img_to_array(heatmap)
+    overlay = img + heatmap * alpha
+    overlay = array_to_img(overlay)
+    # decode prediction
+    preds_decode = tf.keras.applications.imagenet_utils.decode_predictions(preds.numpy())[0] if decode else preds
     return preds_decode, overlay

gcvit/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.0.3"


1	+ __version__ = "1.0.9"

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-tensorflow==2.4.1
-tensorflow_addons==0.14.0
-gradio==3.1.0
-numpy
 matplotlib

+tensorflow==2.4.1
+tensorflow_addons==0.14.0
+gradio==3.1.0
+numpy
 matplotlib

setup.py CHANGED Viewed

@@ -1,50 +1,50 @@
-from setuptools import setup, find_packages
-from codecs import open
-from os import path
-here = path.abspath(path.dirname(__file__))
-# Get the long description from the README file
-with open(path.join(here, "README.md"), encoding="utf-8") as f:
-    long_description = f.read()
-with open(path.join(here, 'requirements.txt')) as f:
-    install_requires = [x for x in f.read().splitlines() if len(x)]
-exec(open("gcvit/version.py").read())
-setup(
-    name="gcvit",
-    version=__version__,
-    description="Tensorflow 2.0 Implementation of GCViT: Global Context Vision Transformer. https://github.com/awsaf49/gcvit-tf",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/awsaf49/gcvit-tf",
-    author="Awsaf",
-    author_email="awsaf49@gmail.com",
-    classifiers=[
-        # How mature is this project? Common values are
-        #   3 - Alpha
-        #   4 - Beta
-        #   5 - Production/Stable
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: Apache Software License",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Topic :: Scientific/Engineering",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-        "Topic :: Software Development",
-        "Topic :: Software Development :: Libraries",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    # Note that this is a string of words separated by whitespace, not a list.
-    keywords="tensorflow computer_vision image classification transformer",
-    packages=find_packages(exclude=["tests"]),
-    include_package_data=True,
-    install_requires=install_requires,
-    python_requires=">=3.6",
-    license="MIT",
 )

+from setuptools import setup, find_packages
+from codecs import open
+from os import path
+here = path.abspath(path.dirname(__file__))
+# Get the long description from the README file
+with open(path.join(here, "README.md"), encoding="utf-8") as f:
+    long_description = f.read()
+with open(path.join(here, 'requirements.txt')) as f:
+    install_requires = [x for x in f.read().splitlines() if len(x)]
+exec(open("gcvit/version.py").read())
+setup(
+    name="gcvit",
+    version=__version__,
+    description="Tensorflow 2.0 Implementation of GCViT: Global Context Vision Transformer. https://github.com/awsaf49/gcvit-tf",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/awsaf49/gcvit-tf",
+    author="Awsaf",
+    author_email="awsaf49@gmail.com",
+    classifiers=[
+        # How mature is this project? Common values are
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Software Development",
+        "Topic :: Software Development :: Libraries",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+    ],
+    # Note that this is a string of words separated by whitespace, not a list.
+    keywords="tensorflow computer_vision image classification transformer",
+    packages=find_packages(exclude=["tests"]),
+    include_package_data=True,
+    install_requires=install_requires,
+    python_requires=">=3.6",
+    license="MIT",
 )