import tensorflow as tf from tensorflow.keras import layers class PatchExtract(layers.Layer): def __init__(self, patch_size, **kwargs): super().__init__(**kwargs) self.patch_size_x = patch_size[0] self.patch_size_y = patch_size[0] def call(self, images): batch_size = tf.shape(images)[0] patches = tf.image.extract_patches( images=images, sizes=(1, self.patch_size_x, self.patch_size_y, 1), strides=(1, self.patch_size_x, self.patch_size_y, 1), rates=(1, 1, 1, 1), padding="VALID", ) patch_dim = patches.shape[-1] patch_num = patches.shape[1] return tf.reshape(patches, (batch_size, patch_num * patch_num, patch_dim)) def get_config(self): config = super().get_config() config.update( { "patch_size_y": self.patch_size_y, "patch_size_x": self.patch_size_x, } ) return config class PatchEmbedding(layers.Layer): def __init__(self, num_patch, embed_dim, **kwargs): super().__init__(**kwargs) self.num_patch = num_patch self.proj = layers.Dense(embed_dim) self.pos_embed = layers.Embedding(input_dim=num_patch, output_dim=embed_dim) def call(self, patch): pos = tf.range(start=0, limit=self.num_patch, delta=1) return self.proj(patch) + self.pos_embed(pos) def get_config(self): config = super().get_config() config.update( { "num_patch": self.num_patch, } ) return config class PatchMerging(layers.Layer): def __init__(self, num_patch, embed_dim): super().__init__() self.num_patch = num_patch self.embed_dim = embed_dim self.linear_trans = layers.Dense(2 * embed_dim, use_bias=False) def call(self, x): height, width = self.num_patch _, _, C = x.get_shape().as_list() x = tf.reshape(x, shape=(-1, height, width, C)) feat_maps = x x0 = x[:, 0::2, 0::2, :] x1 = x[:, 1::2, 0::2, :] x2 = x[:, 0::2, 1::2, :] x3 = x[:, 1::2, 1::2, :] x = tf.concat((x0, x1, x2, x3), axis=-1) x = tf.reshape(x, shape=(-1, (height // 2) * (width // 2), 4 * C)) return self.linear_trans(x), feat_maps def get_config(self): config = super().get_config() config.update({"num_patch": self.num_patch, "embed_dim": self.embed_dim}) return config