import os os.system("pip uninstall -y mmcv-full") os.system("pip uninstall -y mmsegmentation") os.system("pip install ./mmcv_full-1.5.0-cp310-cp310-linux_x86_64.whl") os.system("pip install -r requirements-extras.txt") # os.system("cp /home/user/data/dinov2_vitg14_ade20k_m2f.pth /home/user/.cache/torch/hub/checkpoints/dinov2_vitg14_ade20k_m2f.pth") import gradio as gr import base64 import cv2 import math import itertools from functools import partial from PIL import Image import numpy as np import pandas as pd import dinov2.eval.segmentation.utils.colormaps as colormaps import torch import torch.nn.functional as F from mmseg.apis import init_segmentor, inference_segmentor import dinov2.eval.segmentation.models import dinov2.eval.segmentation_m2f.models.segmentors import urllib import mmcv from mmcv.runner import load_checkpoint model = None model_loaded = False DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2" CONFIG_URL = f"{DINOV2_BASE_URL}/dinov2_vitg14/dinov2_vitg14_ade20k_m2f_config.py" CHECKPOINT_URL = f"{DINOV2_BASE_URL}/dinov2_vitg14/dinov2_vitg14_ade20k_m2f.pth" def load_config_from_url(url: str) -> str: with urllib.request.urlopen(url) as f: return f.read().decode() cfg_str = load_config_from_url(CONFIG_URL) cfg = mmcv.Config.fromstring(cfg_str, file_format=".py") DATASET_COLORMAPS = { "ade20k": colormaps.ADE20K_COLORMAP, "voc2012": colormaps.VOC2012_COLORMAP, } colormap = DATASET_COLORMAPS["ade20k"] flattened = np.array(colormap).flatten() zeros = np.zeros(768) zeros[:flattened.shape[0]] = flattened colorMap = list(zeros.astype('uint8')) model = init_segmentor(cfg) load_checkpoint(model, CHECKPOINT_URL, map_location="cpu") model.cuda() model.eval() class CenterPadding(torch.nn.Module): def __init__(self, multiple): super().__init__() self.multiple = multiple def _get_pad(self, size): new_size = math.ceil(size / self.multiple) * self.multiple pad_size = new_size - size pad_size_left = pad_size // 2 pad_size_right = pad_size - pad_size_left return pad_size_left, pad_size_right @torch.inference_mode() def forward(self, x): pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1])) output = F.pad(x, pads) return output def create_segmenter(cfg, backbone_model): model = init_segmentor(cfg) model.backbone.forward = partial( backbone_model.get_intermediate_layers, n=cfg.model.backbone.out_indices, reshape=True, ) if hasattr(backbone_model, "patch_size"): model.backbone.register_forward_pre_hook(lambda _, x: CenterPadding(backbone_model.patch_size)(x[0])) model.init_weights() return model def render_segmentation(segmentation_logits, dataset): colormap_array = np.array(colormap, dtype=np.uint8) segmentation_logits += 1 segmented_image = Image.fromarray(segmentation_logits) segmented_image.putpalette(colorMap) unique_labels = np.unique(segmentation_logits) colormap_array = colormap_array[unique_labels] df = pd.read_csv("labelmap.txt", sep="\t") html_output = '
' import matplotlib.pyplot as plt for idx, color in enumerate(colormap_array): color_box = np.zeros((20, 20, 3), dtype=np.uint8) color_box[:, :] = color color_box = cv2.cvtColor(color_box, cv2.COLOR_RGB2BGR) _, img_data = cv2.imencode(".jpg", color_box) img_base64 = base64.b64encode(img_data).decode("utf-8") img_data_uri = f"data:image/jpg;base64,{img_base64}" html_output += f'

{df.iloc[unique_labels[idx]-1]["Name"]}

' html_output += "
" return segmented_image, html_output def predict(image_file): array = np.array(image_file)[:, :, ::-1] # BGR segmentation_logits = inference_segmentor(model, array)[0] segmentation_logits = segmentation_logits.astype(np.uint8) segmented_image, html_output = render_segmentation(segmentation_logits, "ade20k") return segmented_image, html_output description = "Gradio demo for Semantic segmentation. To use it, simply upload your image" demo = gr.Interface( title="Semantic Segmentation - DinoV2", fn=predict, inputs=gr.inputs.Image(), outputs=[gr.outputs.Image(type="pil"), gr.outputs.HTML()], examples=["example_1.jpg", "example_2.jpg"], cache_examples=False, description=description, ) demo.launch()