Spaces:

Spidartist
/

AntGenusClassification

Sleeping

App Files Files Community

Spidartist commited on Dec 18, 2023

Commit

88ae77c

•

1 Parent(s): c2fe698

Upload 7 files

Browse files

Files changed (5) hide show

IJEPA_finetune.py +85 -0
app.py +90 -0
downstream-ant-epoch=82-val_loss=0.07.ckpt +3 -0
sample-cifar10-epoch=399-ant.ckpt +3 -0
vit_ijepa_ant_1.pt +3 -0

IJEPA_finetune.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import copy
+import os
+import numpy as np
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+from einops import rearrange
+from torchmetrics.functional import accuracy
+from torchmetrics.functional.classification import multiclass_recall, multiclass_precision
+from x_transformers import Encoder, Decoder
+ON_EPOCH = True
+ON_STEP = False
+BATCH_SIZE = 64
+TARGET_SIZE = (64, 64)
+SPLIT_RATE = 0.8
+ROOT_DIR_DATA = "/kaggle/input/ant-data-new/data"
+class PatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+    def __init__(self, img_size=TARGET_SIZE[0], patch_size=4, in_chans=3, embed_dim=64):
+        super().__init__()
+        if isinstance(img_size, int):
+            img_size = img_size, img_size
+        if isinstance(patch_size, int):
+            patch_size = patch_size, patch_size
+        # calculate the number of patches
+        self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
+        # convolutional layer to convert the image into patches
+        self.conv = nn.Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        # flatten the patches
+        x = rearrange(x, 'b e h w -> b (h w) e')
+        return x
+class ViTIJEPA(nn.Module):
+    def __init__(self, img_size, patch_size, in_chans, embed_dim, enc_depth, num_heads,
+                 num_classes, post_emb_norm=False,
+                 layer_dropout=0.):
+        super().__init__()
+        self.layer_dropout = layer_dropout
+        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        self.num_tokens = self.patch_embed.patch_shape[0] * self.patch_embed.patch_shape[1]
+        self.pos_embedding = nn.Parameter(torch.randn(1, self.num_tokens, embed_dim))
+        self.post_emb_norm = nn.LayerNorm(embed_dim) if post_emb_norm else nn.Identity()
+        self.student_encoder = Encoder(
+            dim=embed_dim,
+            heads=num_heads,
+            depth=enc_depth,
+            layer_dropout=self.layer_dropout,
+            flash=True
+        )
+        self.average_pool = nn.AvgPool1d((embed_dim), stride=1)
+        # mlp head
+        self.mlp_head = nn.Sequential(
+            nn.LayerNorm(self.num_tokens),
+            nn.Linear(self.num_tokens, num_classes),
+        )
+    def forward(self, x):
+        x = self.patch_embed(x)
+        b, n, e = x.shape
+        # add the positional embeddings
+        x = x + self.pos_embedding
+        # normalize the embeddings
+        x = self.post_emb_norm(x)
+        # if mode is test, we get return full embedding:
+        x = self.student_encoder(x)
+        x = self.average_pool(x)  # conduct average pool like in paper
+        x = x.squeeze(-1)
+        x = self.mlp_head(x)  # pass through mlp head
+        return x

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import gradio as gr
+from IJEPA_finetune import ViTIJEPA
+import torch
+from einops import rearrange
+from torchvision.transforms import Compose
+import torchvision
+classes = ['Acanthostichus',
+           'Aenictus',
+           'Amblyopone',
+           'Attini',
+           'Bothriomyrmecini',
+           'Camponotini',
+           'Cerapachys',
+           'Cheliomyrmex',
+           'Crematogastrini',
+           'Cylindromyrmex',
+           'Dolichoderini',
+           'Dorylus',
+           'Eciton',
+           'Ectatommini',
+           'Formicini',
+           'Fulakora',
+           'Gesomyrmecini',
+           'Gigantiopini',
+           'Heteroponerini',
+           'Labidus',
+           'Lasiini',
+           'Leptomyrmecini',
+           'Lioponera',
+           'Melophorini',
+           'Myopopone',
+           'Myrmecia',
+           'Myrmelachistini',
+           'Myrmicini',
+           'Myrmoteratini',
+           'Mystrium',
+           'Neivamyrmex',
+           'Nomamyrmex',
+           'Oecophyllini',
+           'Ooceraea',
+           'Paraponera',
+           'Parasyscia',
+           'Plagiolepidini',
+           'Platythyreini',
+           'Pogonomyrmecini',
+           'Ponerini',
+           'Prionopelta',
+           'Probolomyrmecini',
+           'Proceratiini',
+           'Pseudomyrmex',
+           'Solenopsidini',
+           'Stenammini',
+           'Stigmatomma',
+           'Syscia',
+           'Tapinomini',
+           'Tetraponera',
+           'Zasphinctus']
+class_to_idx = {idx: cls for idx, cls in enumerate(classes)}
+tf = Compose([torchvision.transforms.Resize((64, 64), antialias=True)])
+model = ViTIJEPA(64, 4, 3, 64, 8, 8, len(classes))
+model.load_state_dict(torch.load("vit_ijepa_ant_1.pt", map_location=torch.device('cpu')))
+def ant_genus_classification(image):
+    image = torch.Tensor(image)
+    image = image.unsqueeze(0)
+    image = rearrange(image, 'b h w c -> b c h w')
+    image = tf(image)
+    print(image.shape)
+    with torch.no_grad():
+        prediction = torch.nn.functional.softmax(model(image)[0], dim=0)
+        # print(prediction.tolist())
+        confidences = {class_to_idx[i]: float(prediction[i]) for i in range(len(classes))}
+    return confidences
+    # prediction = model(image)[0]
+    # prediction = prediction.tolist()
+    # print(prediction)
+    # return {
+    #     class_to_idx[i]: prediction[i] for i in range(len(prediction)) if prediction[i] > 0.01
+    # }
+demo = gr.Interface(fn=ant_genus_classification, inputs="image", outputs=gr.Label(num_top_classes=3))
+if __name__ == "__main__":
+    demo.launch(debug=True)

downstream-ant-epoch=82-val_loss=0.07.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:481ca7a89125f57b57dc11d4fa111294f670cfb6e8b1b183bc9eb6922fc87d81
+size 25619801

sample-cifar10-epoch=399-ant.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1895c1679dfccce8093531f650ad9b7fb888c8bd335e2b7e1d1eab2d6fc87bae
+size 33430443

vit_ijepa_ant_1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d00f67fe6537693dfae4b35841643a54e893e3a21a5b91fd954e0718e1836982
+size 5438855