ibm-nasa-geospatial
/

Prithvi-100M

Model card Files Files and versions Community

Paolo-Fraccaro commited on Jul 24, 2023

Commit

b8e0a76

•

1 Parent(s): c70cc56

Update Prithvi.py

Files changed (1) hide show

Prithvi.py +31 -3

Prithvi.py CHANGED Viewed

@@ -15,12 +15,42 @@ import torch
 import torch.nn as nn
 from timm.models.vision_transformer import Block
-from timm.models.layers import to_2tuple, _assert
 import numpy as np
 from einops import rearrange
 def get_3d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
     """
     grid_size: 3d tuple of grid size: t, h, w
@@ -85,8 +115,6 @@ class PatchEmbed(nn.Module):
     def forward(self, x):
         B, C, T, H, W = x.shape
-        _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).")
-        _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).")
         x = self.proj(x)
         if self.flatten:
             x = x.flatten(2).transpose(1, 2)  # B,C,T,H,W -> B,C,L -> B,L,C

 import torch.nn as nn
 from timm.models.vision_transformer import Block
+from timm.models.layers import to_2tuple
 import numpy as np
 from einops import rearrange
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float32)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out) # (M, D/2)
+    emb_cos = np.cos(out) # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
+    return emb
 def get_3d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
     """
     grid_size: 3d tuple of grid size: t, h, w
     def forward(self, x):
         B, C, T, H, W = x.shape
         x = self.proj(x)
         if self.flatten:
             x = x.flatten(2).transpose(1, 2)  # B,C,T,H,W -> B,C,L -> B,L,C