Spaces:

Nuzz23
/

VisualSemSeg

Sleeping

App Files Files Community

Nunzio commited on Jun 27, 2025

Commit

6a0b93e

1 Parent(s): bdf4b96

added files

Browse files

Files changed (8) hide show

.gitignore +1 -0
app.py +66 -0
model/BiSeNet/build_bisenet.py +170 -0
model/BiSeNet/build_contextpath.py +64 -0
requirements.txt +3 -0
utils/imageHandling.py +33 -0
utils2.py +37 -0
weights/BiSeNet/weightADV.pth +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv/

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os, torch, torchvision
+import torchvision.transforms.functional
+from model.BiSeNet.build_bisenet import BiSeNet
+import gradio as gr
+from utils.imageHandling import hfImageToTensor, preprocessing
+# %% prediction on an image
+def predict(inputImage: torch.Tensor, model: BiSeNet) -> torch.Tensor:
+    """
+    Predict the segmentation mask for the input image using the provided model.
+    Args:
+        inputImage (torch.Tensor): The input image tensor.
+        model (BiSeNet): The BiSeNet model for segmentation.
+    Returns:
+        prediction (torch.Tensor): The predicted segmentation mask.
+    """
+    with torch.no_grad():
+        output = model(preprocessing(inputImage))
+    output = output[0] if isinstance(output, (tuple, list)) else output
+    return output[0].argmax(dim=0, keepdim=True)
+# %% load model
+def loadModel(model:str = 'bisenet', device: str = 'cpu')->BiSeNet:
+    """
+    Load the specified model and move it to the given device.
+    Args:
+        model (str): model to be loaded.
+        device (str): Device to load the model onto ('cpu' or 'cuda').
+    Returns:
+        model (BiSeNet): The loaded BiSeNet model.
+    """
+    match model.lower() if isinstance(model, str) else model:
+        case 'bisenet': model = loadBiSeNet(device)
+        case _: raise NotImplementedError(f"Model {model} is not implemented. Please choose 'bisenet' .")
+    return model
+# BiSeNet model loading function
+def loadBiSeNet(device: str = 'cpu') -> BiSeNet:
+    """
+    Load the BiSeNet model and move it to the specified device.
+    Args:
+        device (str): Device to load the model onto ('cpu' or 'cuda').
+    Returns:
+        model (BiSeNet): The loaded BiSeNet model.
+    """
+    model = BiSeNet(n_classes=19, context_path='resnet18').to(device)
+    model.load_state_dict(torch.load('./weights/BiSeNet/weightADV.pth', map_location=device))
+    model.eval()
+    return model

model/BiSeNet/build_bisenet.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import torch
+from torch import nn
+from .build_contextpath import build_contextpath
+import warnings
+warnings.filterwarnings(action='ignore')
+class ConvBlock(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=2, padding=1):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
+                               stride=stride, padding=padding, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+    def forward(self, input):
+        x = self.conv1(input)
+        return self.relu(self.bn(x))
+class Spatial_path(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.convblock1 = ConvBlock(in_channels=3, out_channels=64)
+        self.convblock2 = ConvBlock(in_channels=64, out_channels=128)
+        self.convblock3 = ConvBlock(in_channels=128, out_channels=256)
+    def forward(self, input):
+        x = self.convblock1(input)
+        x = self.convblock2(x)
+        x = self.convblock3(x)
+        return x
+class AttentionRefinementModule(torch.nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.sigmoid = nn.Sigmoid()
+        self.in_channels = in_channels
+        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+    def forward(self, input):
+        # global average pooling
+        x = self.avgpool(input)
+        assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1))
+        x = self.conv(x)
+        x = self.sigmoid(self.bn(x))
+        # x = self.sigmoid(x)
+        # channels of input and x should be same
+        x = torch.mul(input, x)
+        return x
+class FeatureFusionModule(torch.nn.Module):
+    def __init__(self, num_classes, in_channels):
+        super().__init__()
+        # self.in_channels = input_1.channels + input_2.channels
+        # resnet101 3328 = 256(from spatial path) + 1024(from context path) + 2048(from context path)
+        # resnet18  1024 = 256(from spatial path) + 256(from context path) + 512(from context path)
+        self.in_channels = in_channels
+        self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1)
+        self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
+        self.sigmoid = nn.Sigmoid()
+        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+    def forward(self, input_1, input_2):
+        x = torch.cat((input_1, input_2), dim=1)
+        assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1))
+        feature = self.convblock(x)
+        x = self.avgpool(feature)
+        x = self.relu(self.conv1(x))
+        x = self.sigmoid(self.conv2(x))
+        x = torch.mul(feature, x)
+        x = torch.add(x, feature)
+        return x
+class BiSeNet(torch.nn.Module):
+    def __init__(self, num_classes, context_path):
+        super().__init__()
+        # build spatial path
+        self.saptial_path = Spatial_path()
+        # build context path
+        self.context_path = build_contextpath(name=context_path)
+        # build attention refinement module  for resnet 101
+        if context_path == 'resnet101':
+            self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024)
+            self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048)
+            # supervision block
+            self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1)
+            self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
+            # build feature fusion module
+            self.feature_fusion_module = FeatureFusionModule(num_classes, 3328)
+        elif context_path == 'resnet18':
+            # build attention refinement module  for resnet 18
+            self.attention_refinement_module1 = AttentionRefinementModule(256, 256)
+            self.attention_refinement_module2 = AttentionRefinementModule(512, 512)
+            # supervision block
+            self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1)
+            self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1)
+            # build feature fusion module
+            self.feature_fusion_module = FeatureFusionModule(num_classes, 1024)
+        else:
+            print('Error: unspport context_path network \n')
+        # build final convolution
+        self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1)
+        self.init_weight()
+        self.mul_lr = []
+        self.mul_lr.append(self.saptial_path)
+        self.mul_lr.append(self.attention_refinement_module1)
+        self.mul_lr.append(self.attention_refinement_module2)
+        self.mul_lr.append(self.supervision1)
+        self.mul_lr.append(self.supervision2)
+        self.mul_lr.append(self.feature_fusion_module)
+        self.mul_lr.append(self.conv)
+    def init_weight(self):
+        for name, m in self.named_modules():
+            if 'context_path' not in name:
+                if isinstance(m, nn.Conv2d):
+                    nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
+                elif isinstance(m, nn.BatchNorm2d):
+                    m.eps = 1e-5
+                    m.momentum = 0.1
+                    nn.init.constant_(m.weight, 1)
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, input):
+        # output of spatial path
+        sx = self.saptial_path(input)
+        # output of context path
+        cx1, cx2, tail = self.context_path(input)
+        cx1 = self.attention_refinement_module1(cx1)
+        cx2 = self.attention_refinement_module2(cx2)
+        cx2 = torch.mul(cx2, tail)
+        # upsampling
+        cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear')
+        cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear')
+        cx = torch.cat((cx1, cx2), dim=1)
+        if self.training == True:
+            cx1_sup = self.supervision1(cx1)
+            cx2_sup = self.supervision2(cx2)
+            cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear')
+            cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear')
+        # output of feature fusion module
+        result = self.feature_fusion_module(sx, cx)
+        # upsampling
+        result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear')
+        result = self.conv(result)
+        if self.training == True:
+            return result, cx1_sup, cx2_sup
+        return result

model/BiSeNet/build_contextpath.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+from torchvision import models
+class resnet18(torch.nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        self.features = models.resnet18(pretrained=pretrained)
+        self.conv1 = self.features.conv1
+        self.bn1 = self.features.bn1
+        self.relu = self.features.relu
+        self.maxpool1 = self.features.maxpool
+        self.layer1 = self.features.layer1
+        self.layer2 = self.features.layer2
+        self.layer3 = self.features.layer3
+        self.layer4 = self.features.layer4
+    def forward(self, input):
+        x = self.conv1(input)
+        x = self.relu(self.bn1(x))
+        x = self.maxpool1(x)
+        feature1 = self.layer1(x)  # 1 / 4
+        feature2 = self.layer2(feature1)  # 1 / 8
+        feature3 = self.layer3(feature2)  # 1 / 16
+        feature4 = self.layer4(feature3)  # 1 / 32
+        # global average pooling to build tail
+        tail = torch.mean(feature4, 3, keepdim=True)
+        tail = torch.mean(tail, 2, keepdim=True)
+        return feature3, feature4, tail
+class resnet101(torch.nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        self.features = models.resnet101(pretrained=pretrained)
+        self.conv1 = self.features.conv1
+        self.bn1 = self.features.bn1
+        self.relu = self.features.relu
+        self.maxpool1 = self.features.maxpool
+        self.layer1 = self.features.layer1
+        self.layer2 = self.features.layer2
+        self.layer3 = self.features.layer3
+        self.layer4 = self.features.layer4
+    def forward(self, input):
+        x = self.conv1(input)
+        x = self.relu(self.bn1(x))
+        x = self.maxpool1(x)
+        feature1 = self.layer1(x)  # 1 / 4
+        feature2 = self.layer2(feature1)  # 1 / 8
+        feature3 = self.layer3(feature2)  # 1 / 16
+        feature4 = self.layer4(feature3)  # 1 / 32
+        # global average pooling to build tail
+        tail = torch.mean(feature4, 3, keepdim=True)
+        tail = torch.mean(tail, 2, keepdim=True)
+        return feature3, feature4, tail
+def build_contextpath(name):
+    model = {
+        'resnet18': resnet18(pretrained=True),
+        'resnet101': resnet101(pretrained=True)
+    }
+    return model[name]

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+torchvision
+gradio

utils/imageHandling.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch, torchvision
+# %% image loading
+def hfImageToTensor(image, width:int=1024, height:int=512)->torch.Tensor:
+    """
+    Convert an input image (PIL.Image or numpy array) from Hugging Face to a torch tensor
+    of shape (3, height, width) and type float32.
+    Args:
+        image: Input image (PIL.Image or numpy array).
+        width (int): Target width.
+        height (int): Target height.
+    Returns:
+        torch.Tensor: Image tensor of shape (3, height, width).
+    """
+    image = image if isinstance(image, torch.Tensor) else torchvision.transforms.functional.to_tensor(image)
+    return torchvision.transforms.functional.resize(image, [height, width])
+# %% preprocessing
+def preprocessing(image_tensor: torch.Tensor) -> torch.Tensor:
+    """
+    Standardize the image tensor and add batch dimension.
+    Args:
+        image_tensor (torch.Tensor): Image tensor of shape (3, H, W).
+    Returns:
+        torch.Tensor: Preprocessed tensor of shape (1, 3, H, W).
+    """
+    return torchvision.transforms.functional.normalize(
+        image_tensor, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+    ).unsqueeze(0)

utils2.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+def print_mask(mask:torch.Tensor, numClasses:int=19)->None:
+    """
+        Visualizes the segmentation mask by mapping each class to a specific color.
+    Args:
+        mask (torch.Tensor): The segmentation mask to visualize.
+        numClasses (int, optional): Number of classes in the segmentation mask. Defaults to 19.
+    """
+    colors = [
+        (128, 64, 128),  # 0: road
+        (244, 35, 232),  # 1: sidewalk
+        (70, 70, 70),    # 2: building
+        (102, 102, 156), # 3: wall
+        (190, 153, 153), # 4: fence
+        (153, 153, 153), # 5: pole
+        (250, 170, 30),  # 6: traffic light
+        (220, 220, 0),   # 7: traffic sign
+        (107, 142, 35),  # 8: vegetation
+        (152, 251, 152), # 9: terrain
+        (70, 130, 180),  # 10: sky
+        (220, 20, 60),   # 11: person
+        (255, 0, 0),     # 12: rider
+        (0, 0, 142),     # 13: car
+        (0, 0, 70),      # 14: truck
+        (0, 60, 100),    # 15: bus
+        (0, 80, 100),    # 16: train
+        (0, 0, 230),     # 17: motorcycle
+        (119, 11, 32)    # 18: bicycle
+    ]
+    new_mask = torch.zeros((mask.shape[0], mask.shape[1], 3),dtype=torch.uint8)
+    new_mask[mask == 255] = (0,0,0)
+    for i in range (numClasses):
+        new_mask[mask == i] = colors[i][:3]
+    return new_mask.permute(2,0,1)

weights/BiSeNet/weightADV.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:880db4160f20c87aecc13845ad691b1963fbce3d713b1dda1964457b9e0d8f0a
+size 121015606