Spaces:

mapitanywhere
/

Mapper

Building

App Files Files Community

Jiaye Zou commited on Jun 26

Commit

50318d8

•

1 Parent(s): f474bfd

update: gradio app with docker

Browse files

Files changed (7) hide show

Dockerfile +30 -0
README.md +2 -3
app.py +62 -12
config.yaml +36 -0
get_weights.sh +9 -0
mapper/utils/viz_2d.py +43 -13
requirements.txt +23 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
+# Set working directory
+WORKDIR /mapper
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    wget \
+    unzip \
+    vim \
+    ffmpeg \
+    libsm6 \
+    libxext6
+RUN pip install --no-cache-dir 	gradio[oauth]==4.36.1 	"uvicorn>=0.14.0" 	spaces
+COPY . /mapper
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Get Weights
+RUN bash get_weights.sh
+# Clear APT and pip cache
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/pip-reqs
+# Start the app
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -3,10 +3,9 @@ title: "Map It Anywhere (MIA): Empowering Bird’s Eye View Mapping using Large-
 emoji: 🌍
 colorFrom: green
 colorTo: blue
-sdk: gradio
-sdk_version: "4.36.1"
-app_file: app.py
 pinned: true
 ---
 <p align="center">
 <h1 align="center">Map It Anywhere (MIA): Empowering Bird’s Eye View Mapping using Large-scale Public Data</h1>

 emoji: 🌍
 colorFrom: green
 colorTo: blue
+sdk: docker
 pinned: true
+app_port: 7860
 ---
 <p align="center">
 <h1 align="center">Map It Anywhere (MIA): Empowering Bird’s Eye View Mapping using Large-scale Public Data</h1>

app.py CHANGED Viewed

@@ -3,9 +3,14 @@ from matplotlib import pyplot as plt
 from mapper.utils.io import read_image
 from mapper.utils.exif import EXIF
 from mapper.utils.wrappers import Camera
 from perspective2d import PerspectiveFields
 import numpy as np
 from typing import Optional, Tuple
 description = """
 <h1 align="center">
@@ -24,6 +29,10 @@ Mapper generates birds-eye-view maps from first person view monocular images. Tr
 </p>
 """
 class ImageCalibrator(PerspectiveFields):
     def __init__(self, version: str = "Paramnet-360Cities-edina-centered"):
         super().__init__(version)
@@ -40,7 +49,6 @@ class ImageCalibrator(PerspectiveFields):
             _, focal_ratio = exif.extract_focal()
             if focal_ratio != 0:
                 focal_length = focal_ratio * max(h, w)
         calib = self.inference(img_bgr=image_rgb[..., ::-1])
         roll_pitch = (calib["pred_roll"].item(), calib["pred_pitch"].item())
         if focal_length is None:
@@ -57,26 +65,67 @@ class ImageCalibrator(PerspectiveFields):
         )
         return roll_pitch, camera
-def run(input_img):
-    calibrator = ImageCalibrator().to("cuda")
     image_path = input_img.name
     image = read_image(image_path)
-    image = image.to("cuda")
     with open(image_path, "rb") as fid:
         exif = EXIF(fid, lambda: image.shape[:2])
     gravity, camera = calibrator.run(image, exif=exif)
-    print(f"Gravity: {gravity}")
-    print(f"Camera: {camera._data}")
-    plt.imshow(image)
-    plt.axis('off')
     fig1 = plt.gcf()
-    return fig1
 demo = gr.Interface(
     fn=run,
@@ -84,7 +133,8 @@ demo = gr.Interface(
         gr.File(file_types=["image"], label="Input Image")
     ],
     outputs=[
-        gr.Plot(label="Inputs", format="png")
     ],
     description=description,)
-demo.launch(share=True)

 from mapper.utils.io import read_image
 from mapper.utils.exif import EXIF
 from mapper.utils.wrappers import Camera
+from mapper.data.image import rectify_image, pad_image, resize_image
+from mapper.utils.viz_2d import one_hot_argmax_to_rgb, plot_images
+from mapper.module import GenericModule
 from perspective2d import PerspectiveFields
+import torch
 import numpy as np
 from typing import Optional, Tuple
+from omegaconf import OmegaConf
 description = """
 <h1 align="center">
 </p>
 """
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+cfg = OmegaConf.load("config.yaml")
 class ImageCalibrator(PerspectiveFields):
     def __init__(self, version: str = "Paramnet-360Cities-edina-centered"):
         super().__init__(version)
             _, focal_ratio = exif.extract_focal()
             if focal_ratio != 0:
                 focal_length = focal_ratio * max(h, w)
         calib = self.inference(img_bgr=image_rgb[..., ::-1])
         roll_pitch = (calib["pred_roll"].item(), calib["pred_pitch"].item())
         if focal_length is None:
         )
         return roll_pitch, camera
+def preprocess_pipeline(image, roll_pitch, camera):
+    image = torch.from_numpy(image).float() / 255
+    image = image.permute(2, 0, 1).to(device)
+    camera = camera.to(device)
+    image, valid = rectify_image(image, camera.float(), -roll_pitch[0], -roll_pitch[1])
+    roll_pitch *= 0
+    image, _, camera, valid = resize_image(
+        image=image,
+        size=512,
+        camera=camera,
+        fn=max,
+        valid=valid
+    )
+    image, valid, camera = pad_image(
+        image, 512, camera, valid
+    )
+    camera = torch.stack([camera])
+    return {
+        "image": image.unsqueeze(0).to(device),
+        "valid": valid.unsqueeze(0).to(device),
+        "camera": camera.float().to(device),
+    }
+calibrator = ImageCalibrator().to(device)
+model = GenericModule(cfg)
+model = model.load_from_checkpoint("trained_weights/mapper-excl-ood.ckpt", strict=False, cfg=cfg)
+model = model.to(device)
+model = model.eval()
+def run(input_img):
     image_path = input_img.name
     image = read_image(image_path)
     with open(image_path, "rb") as fid:
         exif = EXIF(fid, lambda: image.shape[:2])
     gravity, camera = calibrator.run(image, exif=exif)
+    data = preprocess_pipeline(image, gravity, camera)
+    res = model(data)
+    plot_images([image], pad=0., adaptive=True)
     fig1 = plt.gcf()
+    prediction = res['output']
+    rgb_prediction = one_hot_argmax_to_rgb(prediction, 6).squeeze(0).permute(1, 2, 0).cpu().long().numpy()
+    valid = res['valid_bev'].squeeze(0)[..., :-1]
+    rgb_prediction[~valid.cpu().numpy()] = 255
+    plot_images([rgb_prediction], pad=0., adaptive=True)
+    fig2 = plt.gcf()
+    return fig1, fig2
 demo = gr.Interface(
     fn=run,
         gr.File(file_types=["image"], label="Input Image")
     ],
     outputs=[
+        gr.Plot(label="Inputs", format="png"),
+        gr.Plot(label="Outputs", format="png"),
     ],
     description=description,)
+demo.launch(share=False, server_name="0.0.0.0")

config.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+model:
+  image_encoder:
+    backbone:
+      pretrained: true
+      frozen: true
+      output_dim: 128
+    name: feature_extractor_DPT
+  segmentation_head:
+    dropout_rate: 0.2
+  name: map_perception_net
+  num_classes: 6
+  latent_dim: 128
+  z_max: 50
+  x_max: 25
+  pixel_per_meter: 2
+  num_scale_bins: 32
+  loss:
+    num_classes: 6
+    xent_weight: 1.0
+    dice_weight: 1.0
+    focal_loss: false
+    focal_loss_gamma: 2.0
+    requires_frustrum: true
+    requires_flood_mask: false
+    class_weights:
+    - 1.00351229
+    - 4.34782609
+    - 1.00110121
+    - 1.03124678
+    - 6.69792364
+    - 7.55857899
+    label_smoothing: 0.1
+  scale_range:
+  - 0
+  - 9
+  z_min: null

get_weights.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+#!/bin/bash
+# URL of the file to download
+ood_weights="https://huggingface.co/mapitanywhere/mapper/resolve/main/weights/mapper-excl-ood/model.ckpt"
+mkdir -p trained_weights
+# Download the file using curl
+wget $ood_weights -O trained_weights/mapper-excl-ood.ckpt

mapper/utils/viz_2d.py CHANGED Viewed

@@ -6,6 +6,7 @@
 import numpy as np
 import torch
 def features_to_RGB(*Fs, masks=None, skip=1):
@@ -69,22 +70,18 @@ def one_hot_argmax_to_rgb(y, num_class):
     '''
     class_colors = {
-        'road': (0, 0, 0),           # 0: Black
-        'crossing': (255, 0, 0),     # 1; Red
-        'explicit_pedestrian': (255, 255, 0),  # 2: Yellow
         # 'explicit_void': (128, 128, 128),      # 3: White
-        'park': (0, 255, 0),         # 4: Green
-        'building': (255, 0, 255),   # 5: Magenta
-        'water': (0, 0, 255),        # 6: Blue
-        'terrain': (0, 255, 255),    # 7: Cyan
-        'parking': (170, 170, 170),  # 8: Dark Grey
-        'train': (85, 85, 85) ,       # 9: Light Grey
-        'predicted_void': (256, 256, 256)
     }
     class_colors = class_colors.values()
-    class_colors = [torch.tensor(x) for x in class_colors]
     argmaxed = torch.argmax((y > 0.5).float(), dim=1) # Take argmax
     argmaxed[torch.all(y <= 0.5, dim=1)] = num_class
@@ -97,10 +94,43 @@ def one_hot_argmax_to_rgb(y, num_class):
             argmaxed.shape[1],
             argmaxed.shape[2],
         )
-    ) * 256
     for i in range(num_class + 1):
         seg_rgb[:, 0, :, :][argmaxed == i] = class_colors[i][0]
         seg_rgb[:, 1, :, :][argmaxed == i] = class_colors[i][1]
         seg_rgb[:, 2, :, :][argmaxed == i] = class_colors[i][2]
     return seg_rgb

 import numpy as np
 import torch
+import matplotlib.pyplot as plt
 def features_to_RGB(*Fs, masks=None, skip=1):
     '''
     class_colors = {
+        'road': (68, 68, 68),           # 0: Black
+        'crossing': (244, 162, 97),     # 1; Red
+        'explicit_pedestrian': (233, 196, 106),  # 2: Yellow
         # 'explicit_void': (128, 128, 128),      # 3: White
+        'building': (231, 111, 81),   # 5: Magenta
+        'terrain': (42, 157, 143),    # 7: Cyan
+        'parking': (204, 204, 204),  # 8: Dark Grey
+        'predicted_void': (255, 255, 255)
     }
     class_colors = class_colors.values()
+    class_colors = [torch.tensor(x).float() for x in class_colors]
     argmaxed = torch.argmax((y > 0.5).float(), dim=1) # Take argmax
     argmaxed[torch.all(y <= 0.5, dim=1)] = num_class
             argmaxed.shape[1],
             argmaxed.shape[2],
         )
+    ) * 255
     for i in range(num_class + 1):
         seg_rgb[:, 0, :, :][argmaxed == i] = class_colors[i][0]
         seg_rgb[:, 1, :, :][argmaxed == i] = class_colors[i][1]
         seg_rgb[:, 2, :, :][argmaxed == i] = class_colors[i][2]
     return seg_rgb
+def plot_images(imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True):
+    """Plot a set of images horizontally.
+    Args:
+        imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W).
+        titles: a list of strings, as titles for each image.
+        cmaps: colormaps for monochrome images.
+        adaptive: whether the figure size should fit the image aspect ratios.
+    """
+    n = len(imgs)
+    if not isinstance(cmaps, (list, tuple)):
+        cmaps = [cmaps] * n
+    if adaptive:
+        ratios = [i.shape[1] / i.shape[0] for i in imgs]  # W / H
+    else:
+        ratios = [4 / 3] * n
+    figsize = [sum(ratios) * 4.5, 4.5]
+    fig, ax = plt.subplots(
+        1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios}
+    )
+    if n == 1:
+        ax = [ax]
+    for i in range(n):
+        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
+        ax[i].get_yaxis().set_ticks([])
+        ax[i].get_xaxis().set_ticks([])
+        ax[i].set_axis_off()
+        for spine in ax[i].spines.values():  # remove frame
+            spine.set_visible(False)
+        if titles:
+            ax[i].set_title(titles[i])
+    fig.tight_layout(pad=pad)

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+torch
+torchvision
+numpy
+opencv-python
+Pillow
+tqdm>=4.36.0
+matplotlib
+plotly
+scipy
+omegaconf
+pytorch-lightning
+torchmetrics
+lxml
+rtree
+scikit-learn
+geopy
+exifread
+hydra-core
+umsgpack
+nuscenes-devkit
+perspective2d @ git+https://github.com/jinlinyi/PerspectiveFields.git
+urllib3>=2
+wandb