ibm-nasa-geospatial
/

Prithvi-EO-1.0-100M

Transformers

Pytorch

Geospatial

Temporal ViT

Vit

Inference Endpoints

Model card Files Files and versions Community

Paolo-Fraccaro commited on Jul 30, 2023

Commit

9bb6f80

1 Parent(s): a7f8c2e

add rgb output option

Browse files

Files changed (1) hide show

Prithvi_run_inference.py +76 -19

Prithvi_run_inference.py CHANGED Viewed

@@ -9,7 +9,7 @@ import torch
 import yaml
 from einops import rearrange
-from Prithvi import MaskedAutoencoderViT
 NO_DATA = -9999
@@ -21,12 +21,14 @@ def process_channel_group(orig_img, new_img, channels, data_mean, data_std):
     """ Process *orig_img* and *new_img* for RGB visualization. Each band is rescaled back to the
         original range using *data_mean* and *data_std* and then lowest and highest percentiles are
         removed to enhance contrast. Data is rescaled to (0, 1) range and stacked channels_first.
     Args:
         orig_img: torch.Tensor representing original image (reference) with shape = (bands, H, W).
         new_img: torch.Tensor representing image with shape = (bands, H, W).
         channels: list of indices representing RGB channels.
         data_mean: list of mean values for each band.
         data_std: list of std values for each band.
     Returns:
         torch.Tensor with shape (num_channels, height, width) for original image
         torch.Tensor with shape (num_channels, height, width) for the other image
@@ -37,7 +39,7 @@ def process_channel_group(orig_img, new_img, channels, data_mean, data_std):
     for c in channels:
         orig_ch = orig_img[c, ...]
         valid_mask = torch.ones_like(orig_ch, dtype=torch.bool)
-        valid_mask[orig_ch == 0.0001] = False
         # Back to original data range
         orig_ch = (orig_ch * data_std[c]) + data_mean[c]
@@ -64,9 +66,11 @@ def process_channel_group(orig_img, new_img, channels, data_mean, data_std):
 def read_geotiff(file_path: str):
-    """ Read all bands from *file_path* and returns image + meta info.
     Args:
         file_path: path to image file.
     Returns:
         np.ndarray with shape (bands, height, width)
         meta info dict
@@ -81,6 +85,7 @@ def read_geotiff(file_path: str):
 def save_geotiff(image, output_path: str, meta: dict):
     """ Save multi-band image in Geotiff file.
     Args:
         image: np.ndarray with shape (bands, height, width)
         output_path: path where to save the image
@@ -104,10 +109,12 @@ def _convert_np_uint8(float_image: torch.Tensor):
 def load_example(file_paths: List[str], mean: List[float], std: List[float]):
     """ Build an input example by loading images in *file_paths*.
     Args:
         file_paths: list of file paths .
         mean: list containing mean values for each band in the images in *file_paths*.
         std: list containing std values for each band in the images in *file_paths*.
     Returns:
         np.array containing created example
         list of meta info for each image in *file_paths*
@@ -126,8 +133,8 @@ def load_example(file_paths: List[str], mean: List[float], std: List[float]):
         imgs.append(img)
         metas.append(meta)
-    imgs = np.stack(imgs, axis=0)    # num_frames, img_size, img_size, C
-    imgs = np.moveaxis(imgs, -1, 0).astype('float32')  # C, num_frames, img_size, img_size
     imgs = np.expand_dims(imgs, axis=0)  # add batch dim
     return imgs, metas
@@ -135,11 +142,13 @@ def load_example(file_paths: List[str], mean: List[float], std: List[float]):
 def run_model(model: torch.nn.Module, input_data: torch.Tensor, mask_ratio: float, device: torch.device):
     """ Run *model* with *input_data* and create images from output tokens (mask, reconstructed + visible).
     Args:
         model: MAE model to run.
         input_data: torch.Tensor with shape (B, C, T, H, W).
         mask_ratio: mask ratio to use.
         device: device where model should run.
     Returns:
         3 torch.Tensor with shape (B, C, T, H, W).
     """
@@ -165,6 +174,7 @@ def run_model(model: torch.nn.Module, input_data: torch.Tensor, mask_ratio: floa
 def save_rgb_imgs(input_img, rec_img, mask_img, channels, mean, std, output_dir, meta_data):
     """ Wrapper function to save Geotiff images (original, reconstructed, masked) per timestamp.
     Args:
         input_img: input torch.Tensor with shape (C, T, H, W).
         rec_img: reconstructed torch.Tensor with shape (C, T, H, W).
@@ -199,7 +209,41 @@ def save_rgb_imgs(input_img, rec_img, mask_img, channels, mean, std, output_dir,
                      meta=meta_data[t])
-def main(data_files: List[str], yaml_file_path: str, checkpoint: str, output_dir: str, mask_ratio: float):
     os.makedirs(output_dir, exist_ok=True)
@@ -262,7 +306,7 @@ def main(data_files: List[str], yaml_file_path: str, checkpoint: str, output_dir
             norm_pix_loss=False)
     total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
-    print(f"\n--> model has {total_params / 1e6} Million params.\n")
     model.to(device)
@@ -275,6 +319,12 @@ def main(data_files: List[str], yaml_file_path: str, checkpoint: str, output_dir
     model.eval()
     channels = [bands.index(b) for b in ['B04', 'B03', 'B02']]  # BGR -> RGB
     # Build sliding window
     batch = torch.tensor(input_data, device='cpu')
     windows = batch.unfold(3, img_size, img_size).unfold(4, img_size, img_size)
@@ -302,20 +352,23 @@ def main(data_files: List[str], yaml_file_path: str, checkpoint: str, output_dir
     mask_imgs = rearrange(mask_imgs, '(b h1 w1) c t h w -> b c t (h1 h) (w1 w)',
                           h=img_size, w=img_size, b=1, c=len(bands), t=num_frames, h1=h1, w1=w1)
-    # Mix original image with patches
-    h, w = rec_imgs.shape[-2:]
-    rec_imgs_full = batch.clone()
-    rec_imgs_full[..., :h, :w] = rec_imgs
-    mask_imgs_full = torch.ones_like(batch)
-    mask_imgs_full[..., :h, :w] = mask_imgs
-    # Build RGB images
-    for d in meta_data:
-        d.update(count=3, dtype='uint8', compress='lzw', nodata=0)
-    save_rgb_imgs(batch[0, ...], rec_imgs_full[0, ...], mask_imgs_full[0, ...],
-                  channels, mean, std, output_dir, meta_data)
     print("Done!")
@@ -334,6 +387,10 @@ if __name__ == "__main__":
     parser.add_argument('--mask_ratio', default=None, type=float,
                         help='Masking ratio (percentage of removed patches). '
                              'If None (default) use same value used for pretraining.')
     args = parser.parse_args()
-    main(**vars(args))

 import yaml
 from einops import rearrange
+from mae.models_mae import MaskedAutoencoderViT
 NO_DATA = -9999
     """ Process *orig_img* and *new_img* for RGB visualization. Each band is rescaled back to the
         original range using *data_mean* and *data_std* and then lowest and highest percentiles are
         removed to enhance contrast. Data is rescaled to (0, 1) range and stacked channels_first.
     Args:
         orig_img: torch.Tensor representing original image (reference) with shape = (bands, H, W).
         new_img: torch.Tensor representing image with shape = (bands, H, W).
         channels: list of indices representing RGB channels.
         data_mean: list of mean values for each band.
         data_std: list of std values for each band.
     Returns:
         torch.Tensor with shape (num_channels, height, width) for original image
         torch.Tensor with shape (num_channels, height, width) for the other image
     for c in channels:
         orig_ch = orig_img[c, ...]
         valid_mask = torch.ones_like(orig_ch, dtype=torch.bool)
+        valid_mask[orig_ch == NO_DATA_FLOAT] = False
         # Back to original data range
         orig_ch = (orig_ch * data_std[c]) + data_mean[c]
 def read_geotiff(file_path: str):
+    """ Read all bands from *file_path* and return image + meta info.
     Args:
         file_path: path to image file.
     Returns:
         np.ndarray with shape (bands, height, width)
         meta info dict
 def save_geotiff(image, output_path: str, meta: dict):
     """ Save multi-band image in Geotiff file.
     Args:
         image: np.ndarray with shape (bands, height, width)
         output_path: path where to save the image
 def load_example(file_paths: List[str], mean: List[float], std: List[float]):
     """ Build an input example by loading images in *file_paths*.
     Args:
         file_paths: list of file paths .
         mean: list containing mean values for each band in the images in *file_paths*.
         std: list containing std values for each band in the images in *file_paths*.
     Returns:
         np.array containing created example
         list of meta info for each image in *file_paths*
         imgs.append(img)
         metas.append(meta)
+    imgs = np.stack(imgs, axis=0)    # num_frames, H, W, C
+    imgs = np.moveaxis(imgs, -1, 0).astype('float32')  # C, num_frames, H, W
     imgs = np.expand_dims(imgs, axis=0)  # add batch dim
     return imgs, metas
 def run_model(model: torch.nn.Module, input_data: torch.Tensor, mask_ratio: float, device: torch.device):
     """ Run *model* with *input_data* and create images from output tokens (mask, reconstructed + visible).
     Args:
         model: MAE model to run.
         input_data: torch.Tensor with shape (B, C, T, H, W).
         mask_ratio: mask ratio to use.
         device: device where model should run.
     Returns:
         3 torch.Tensor with shape (B, C, T, H, W).
     """
 def save_rgb_imgs(input_img, rec_img, mask_img, channels, mean, std, output_dir, meta_data):
     """ Wrapper function to save Geotiff images (original, reconstructed, masked) per timestamp.
     Args:
         input_img: input torch.Tensor with shape (C, T, H, W).
         rec_img: reconstructed torch.Tensor with shape (C, T, H, W).
                      meta=meta_data[t])
+def save_imgs(rec_img, mask_img, mean, std, output_dir, meta_data):
+    """ Wrapper function to save Geotiff images (reconstructed, mask) per timestamp.
+    Args:
+        rec_img: reconstructed torch.Tensor with shape (C, T, H, W).
+        mask_img: mask torch.Tensor with shape (C, T, H, W).
+        mean: list of mean values for each band.
+        std: list of std values for each band.
+        output_dir: directory where to save outputs.
+        meta_data: list of dicts with geotiff meta info.
+    """
+    mean = torch.tensor(np.asarray(mean)[:, None, None])  # C H W
+    std = torch.tensor(np.asarray(std)[:, None, None])
+    for t in range(rec_img.shape[1]):
+        # Back to original data range
+        rec_img_t = ((rec_img[:, t, :, :] * std) + mean).to(torch.int16)
+        mask_img_t = mask_img[:, t, :, :].to(torch.int16)
+        # Saving images
+        save_geotiff(image=rec_img_t,
+                     output_path=os.path.join(output_dir, f"predicted_t{t}.tiff"),
+                     meta=meta_data[t])
+        save_geotiff(image=mask_img_t,
+                     output_path=os.path.join(output_dir, f"mask_t{t}.tiff"),
+                     meta=meta_data[t])
+def main(data_files: List[str], yaml_file_path: str, checkpoint: str, output_dir: str,
+         mask_ratio: float, rgb_outputs: bool):
     os.makedirs(output_dir, exist_ok=True)
             norm_pix_loss=False)
     total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print(f"\n--> Model has {total_params:,} parameters.\n")
     model.to(device)
     model.eval()
     channels = [bands.index(b) for b in ['B04', 'B03', 'B02']]  # BGR -> RGB
+    # Reflect pad if not divisible by img_size
+    original_h, original_w = input_data.shape[-2:]
+    pad_h = img_size - (original_h % img_size)
+    pad_w = img_size - (original_w % img_size)
+    input_data = np.pad(input_data, ((0, 0), (0, 0), (0, 0), (0, pad_h), (0, pad_w)), mode='reflect')
     # Build sliding window
     batch = torch.tensor(input_data, device='cpu')
     windows = batch.unfold(3, img_size, img_size).unfold(4, img_size, img_size)
     mask_imgs = rearrange(mask_imgs, '(b h1 w1) c t h w -> b c t (h1 h) (w1 w)',
                           h=img_size, w=img_size, b=1, c=len(bands), t=num_frames, h1=h1, w1=w1)
+    # Cut padded images back to original size
+    rec_imgs_full = rec_imgs[..., :original_h, :original_w]
+    mask_imgs_full = mask_imgs[..., :original_h, :original_w]
+    batch_full = batch[..., :original_h, :original_w]
+    # Build output images
+    if rgb_outputs:
+        for d in meta_data:
+            d.update(count=3, dtype='uint8', compress='lzw', nodata=0)
+        save_rgb_imgs(batch_full[0, ...], rec_imgs_full[0, ...], mask_imgs_full[0, ...],
+                      channels, mean, std, output_dir, meta_data)
+    else:
+        for d in meta_data:
+            d.update(compress='lzw', nodata=0)
+        save_imgs(rec_imgs_full[0, ...], mask_imgs_full[0, ...], mean, std, output_dir, meta_data)
     print("Done!")
     parser.add_argument('--mask_ratio', default=None, type=float,
                         help='Masking ratio (percentage of removed patches). '
                              'If None (default) use same value used for pretraining.')
+    parser.add_argument('--rgb_outputs', action='store_true',
+                        help='If present, output files will only contain RGB channels. '
+                             'Otherwise, all bands will be saved.')
     args = parser.parse_args()
+    main(**vars(args))