Spaces:

junma
/

UniCell

Build error

App Files Files Community

junma commited on Oct 18, 2022

Commit

56afa1a

•

1 Parent(s): f82a26e

add app

Browse files

Files changed (10) hide show

app.py +105 -0
demo.png +0 -0
demo.tif +0 -0
model.pth +3 -0
models/__init__.py +9 -0
models/unicell_modules.py +912 -0
requirements.txt +12 -0
utils/__init__.py +7 -0
utils/multi_task_sliding_window_inference.py +187 -0
utils/postprocess.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Author: Jun Ma
+import os
+join = os.path.join
+import argparse
+import numpy as np
+import torch
+import torch.nn as nn
+import tifffile as tif
+import monai
+from tqdm import tqdm
+from utils.postprocess import mask_overlay
+from monai.transforms import Activations, AddChanneld, AsChannelFirstd, AsDiscrete, Compose, EnsureTyped, EnsureType
+from models.unicell_modules import MiT_B2_UNet_MultiHead, MiT_B3_UNet_MultiHead
+import matplotlib.pyplot as plt
+from skimage import io, exposure, segmentation, morphology
+from utils.postprocess import watershed_post
+from utils.multi_task_sliding_window_inference import multi_task_sliding_window_inference
+import gradio as gr
+def normalize_channel(img, lower=0.1, upper=99.9):
+    non_zero_vals = img[np.nonzero(img)]
+    percentiles = np.percentile(non_zero_vals, [lower, upper])
+    if percentiles[1] - percentiles[0] > 0.001:
+        img_norm = exposure.rescale_intensity(img, in_range=(percentiles[0], percentiles[1]), out_range='uint8')
+    else:
+        img_norm = img
+    return img_norm
+def preprocess(img_data):
+    if len(img_data.shape) == 2:
+        img_data = np.repeat(np.expand_dims(img_data, axis=-1), 3, axis=-1)
+    elif len(img_data.shape) == 3 and img_data.shape[-1] > 3:
+        img_data = img_data[:,:, :3]
+    else:
+        pass
+    pre_img_data = np.zeros(img_data.shape, dtype=np.uint8)
+    for i in range(3):
+        img_channel_i = img_data[:,:,i]
+        if len(img_channel_i[np.nonzero(img_channel_i)])>0:
+            pre_img_data[:,:,i] = normalize_channel(img_channel_i, lower=1, upper=99)
+    return pre_img_data
+def inference(pre_img_data):
+    test_npy = pre_img_data/np.max(pre_img_data)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = MiT_B2_UNet_MultiHead(in_channels=3, out_channels=3, regress_class=1, img_size=256).to(device)
+    checkpoint = torch.load('./model.pth', map_location=torch.device(device))
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.eval()
+    with torch.no_grad():
+        test_tensor = torch.from_numpy(np.expand_dims(test_npy, 0)).permute(0,3,1,2).type(torch.FloatTensor).to(device)
+        val_pred, val_pred_dist = multi_task_sliding_window_inference(inputs=test_tensor, roi_size=(256, 256), sw_batch_size=8, predictor=model)
+        # watershed postprocessing
+        val_seg_inst = watershed_post(val_pred_dist.squeeze(1).cpu().numpy(), val_pred.squeeze(1).cpu().numpy()[:,1])
+        test_pred_mask = val_seg_inst.squeeze().astype(np.uint16)
+        # overlay
+        boundary = segmentation.find_boundaries(test_pred_mask, connectivity=1, mode='inner')
+        boundary = morphology.binary_dilation(boundary, morphology.disk(1))
+        pre_img_data[boundary, 0] = 0
+        pre_img_data[boundary, 1] = 255
+        pre_img_data[boundary, 2] = 0
+    return test_pred_mask, pre_img_data
+def predict(img):
+    print('##########', img.name)
+    img_name = img.name
+    if img_name.endswith('.tif') or img_name.endswith('.tiff'):
+        img_data = tif.imread(img_name)
+    else:
+        img_data = io.imread(img_name)
+    if len(img_data.shape)==2:
+        pre_img_data = normalize_channel(img_data, lower=0.1, upper=99.9)
+        pre_img_data = np.repeat(np.expand_dims(pre_img_data, -1), repeats=3, axis=-1)
+    else:
+        pre_img_data = np.zeros((img_data.shape[0], img_data.shape[1], 3), dtype=np.uint8)
+        for i in range(3):
+            img_channel_i = img_data[:,:,i]
+            if len(img_channel_i[np.nonzero(img_channel_i)])>0:
+                pre_img_data[:,:,i] = normalize_channel(img_channel_i, lower=0.1, upper=99.9)
+    seg_labels, seg_overlay = inference(pre_img_data)
+    tif.imwrite(join(os.getcwd(), 'segmentation.tiff'), seg_labels, compression='zlib')
+    return seg_overlay, join(os.getcwd(), 'segmentation.tiff')
+unicell_api = gr.Interface(
+    predict,
+    inputs = gr.File(label="Input image (png, bmp, jpg, tif, tiff)"),
+    outputs = [gr.Image(label="Segmentation overlay"), gr.File(label="Download segmentation")],
+    title = "UniCell Online Demo",
+    examples=['demo.png', 'demo.tif']
+)
+unicell_api.launch()

demo.png ADDED Viewed

demo.tif ADDED Viewed

model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a6849ec1969d4abc37b8eb915d03f7b6d6eb3092fc3f1ac5060d1310ddf89f9
+size 90440917

models/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Mar 20 14:23:55 2022
+@author: jma
+"""
+from .unicell_modules import *

models/unicell_modules.py ADDED Viewed

	@@ -0,0 +1,912 @@

+# ---------------------------------------------------------------
+# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+#
+# This work is licensed under the NVIDIA Source Code License
+# ---------------------------------------------------------------
+import torch
+import torch.nn as nn
+from functools import partial
+import math
+from itertools import repeat
+import collections.abc
+from typing import Tuple, Union
+from monai.networks.blocks import PatchEmbed, UnetOutBlock, UnetrBasicBlock, UnetrUpBlock, UnetrPrUpBlock
+from monai.networks.blocks.dynunet_block import get_conv_layer
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        print("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+#%%
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        # self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.drop_path = nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+        return x
+#%%
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x):
+        x = self.proj(x) # [2, 3, 224, 224]-> [2, 64, 56, 56]
+        # print(f"{x.shape=}")
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2) # [2, 64, 56, 56]-> [2, 3136, 64]
+        # print(f"{x.shape=}")
+        x = self.norm(x) # [2, 3136, 64]-> [2, 3136, 64]
+        # print(f"{x.shape=}")
+        return x, H, W
+# embed_dims=[64, 128, 256, 512]
+# patch_embed1 = OverlapPatchEmbed(img_size=224,patch_size=7,stride=4,in_chans=in_chans, embed_dim=64)
+# x1, H, W = patch_embed1(input_img)
+# x1 = x1.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+# patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
+#                                       embed_dim=embed_dims[1])
+# x2, H, W = patch_embed2(x1)
+# x2 = x2.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+# patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
+#                                       embed_dim=embed_dims[2])
+# x3, H, W = patch_embed3(x2)
+# x3 = x3.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+# patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],embed_dim=embed_dims[3])
+# x4, H, W = patch_embed4(x3)
+# x4 = x4.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+#%%
+class MixVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]):
+        super().__init__()
+        # self.num_classes = num_classes
+        self.depths = depths
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_chans=in_chans,
+                                              embed_dim=embed_dims[0])
+        self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
+                                              embed_dim=embed_dims[1])
+        self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
+                                              embed_dim=embed_dims[2])
+        self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],
+                                              embed_dim=embed_dims[3])
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        self.norm1 = norm_layer(embed_dims[0])
+        cur += depths[0]
+        self.block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        self.norm2 = norm_layer(embed_dims[1])
+        cur += depths[1]
+        self.block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        self.norm3 = norm_layer(embed_dims[2])
+        cur += depths[2]
+        self.block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+        self.norm4 = norm_layer(embed_dims[3])
+        # classification head
+        # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            # logger = get_root_logger()
+            # load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger)
+            # load_checkpoint(self, pretrained, map_location='cpu', strict=False)
+            torch.load(pretrained, map_location='cpu')
+    def reset_drop_path(self, drop_path_rate):
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
+        cur = 0
+        for i in range(self.depths[0]):
+            self.block1[i].drop_path.drop_prob = dpr[cur + i]
+        cur += self.depths[0]
+        for i in range(self.depths[1]):
+            self.block2[i].drop_path.drop_prob = dpr[cur + i]
+        cur += self.depths[1]
+        for i in range(self.depths[2]):
+            self.block3[i].drop_path.drop_prob = dpr[cur + i]
+        cur += self.depths[2]
+        for i in range(self.depths[3]):
+            self.block4[i].drop_path.drop_prob = dpr[cur + i]
+    def freeze_patch_emb(self):
+        self.patch_embed1.requires_grad = False
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'}  # has pos_embed may be better
+    def get_classifier(self):
+        return self.head
+    # def reset_classifier(self, num_classes, global_pool=''):
+    #     self.num_classes = num_classes
+    #     self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+    def forward_features(self, x):
+        B = x.shape[0]
+        outs = []
+        # stage 1
+        x, H, W = self.patch_embed1(x)
+        for i, blk in enumerate(self.block1):
+            x = blk(x, H, W)
+        x = self.norm1(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 2
+        x, H, W = self.patch_embed2(x)
+        for i, blk in enumerate(self.block2):
+            x = blk(x, H, W)
+        x = self.norm2(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 3
+        x, H, W = self.patch_embed3(x)
+        for i, blk in enumerate(self.block3):
+            x = blk(x, H, W)
+        x = self.norm3(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        # stage 4
+        x, H, W = self.patch_embed4(x)
+        for i, blk in enumerate(self.block4):
+            x = blk(x, H, W)
+        x = self.norm4(x)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+        return outs
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head(x)
+        return x
+class DWConv(nn.Module):
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.transpose(1, 2).view(B, C, H, W)
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2)
+        return x
+class mit_b0(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b0, self).__init__(
+            patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+class mit_b1(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b1, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+class mit_b2(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b2, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+class mit_b3(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b3, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+class mit_b4(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b4, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+class mit_b5(MixVisionTransformer):
+    def __init__(self, **kwargs):
+        super(mit_b5, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1)
+#%% B2
+class MiT_B2_UNet_MultiHead(nn.Module):
+    def __init__(self,
+                in_channels: int,
+                out_channels: int,
+                regress_class: int = 1,
+                img_size: Tuple[int, int] = (256,256),
+                feature_size: int = 16,
+                spatial_dims: int = 2,
+                # hidden_size: int = 768,
+                # mlp_dim: int = 3072,
+                num_heads = [1, 2, 4, 8],
+                # pos_embed: str = "perceptron",
+                norm_name: Union[Tuple, str] = "instance",
+                conv_block: bool = False,
+                res_block: bool = True,
+                dropout_rate: float = 0.0,
+                debug: bool = False
+                 ):
+        super().__init__()
+        self.debug = debug
+        self.mit_b3 = MixVisionTransformer(img_size=img_size, patch_size=4, embed_dims=[feature_size*2, feature_size*4, feature_size*8, feature_size*16],
+                                           num_heads=num_heads, mlp_ratios=[4, 4, 4, 4], qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6),
+                                           depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], drop_rate=0.0, drop_path_rate=0.1)
+        self.encoder1 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            out_channels=feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder2 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=2 * feature_size,
+            out_channels=2 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder3 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=4 * feature_size,
+            out_channels=4 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder4 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=8 * feature_size,
+            out_channels=8 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder5 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=16 * feature_size,
+            out_channels=16 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.decoder4 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 16,
+            out_channels=feature_size * 8,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder3 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 8,
+            out_channels=feature_size * 4,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder2 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 4,
+            out_channels=feature_size * 2,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.transp_conv = get_conv_layer(
+            spatial_dims=2,
+            in_channels=feature_size*2,
+            out_channels=feature_size*2,
+            kernel_size=3,
+            stride=2,
+            conv_only=True,
+            is_transposed=True,
+        )
+        self.decoder1 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 2,
+            out_channels=feature_size,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.out_interior = UnetOutBlock(spatial_dims=2, in_channels=feature_size, out_channels=out_channels)  # type: ignore
+        self.out_dist = UnetOutBlock(spatial_dims=2, in_channels=feature_size, out_channels=1)  # type: ignore
+    def forward(self, x_in):
+        hidden_states_out = self.mit_b3(x_in) # x: (B, 256,768), hidden_states_out: list, 12 elements, (B,256,768)
+        enc1 = self.encoder1(x_in) # (B, 16, 256, 256)
+        x1 = hidden_states_out[0] # (B, 32, 64, 64)
+        enc2 = self.encoder2(x1) # (B, 64, 32, 32)
+        x2 = hidden_states_out[1] # (B, 64, 32, 32)
+        enc3 = self.encoder3(x2) # (B, 128, 16, 16)
+        x3 = hidden_states_out[2] # (B, 128, 16,16)
+        enc4 = self.encoder4(x3) # (B, 256, 8, 8)
+        x4 = hidden_states_out[3] # (B, 256, 8, 8)
+        enc5 = self.encoder5(x4) # (B, 256, 8, 8)
+        # print(f"{enc1.shape=}, {enc2.shape=}, {enc3.shape=}, {enc4.shape=}, {enc5.shape=}")
+        dec4 = self.decoder4(enc5, enc4) # (B, 128, 16, 16);  up -> cat -> ResConv; (B, 128, 16, 16)
+        dec3 = self.decoder3(dec4, enc3) # (B, 64, 32, 32)
+        dec2 = self.decoder2(dec3, enc2) # (B, 32, 64, 64)
+        dec2_up = self.transp_conv(dec2) # [B, 32, 128, 128]
+        dec1 = self.decoder1(dec2_up, enc1) # (B, 16, 256, 256)
+        logits = self.out_interior(dec1)
+        dist = self.out_dist(dec1)
+        if self.debug:
+            return hidden_states_out, enc1, enc2, enc3, enc4, dec4, dec3, dec2, dec1, logits
+        else:
+            return logits, dist
+        # print(f"{dec1.shape=}, {dec2.shape=}, {dec3.shape=}, {dec4.shape=}, {logits.shape=}")
+img_size = 256
+in_chans = 3
+B = 2
+input_img = torch.randn((B,in_chans,img_size,img_size))
+b2 = MiT_B2_UNet_MultiHead(3, 3, img_size=img_size)
+logits, dist = b2(input_img)
+#%% B3
+class MiT_B3_UNet_MultiHead(nn.Module):
+    def __init__(self,
+                in_channels: int,
+                out_channels: int,
+                regress_class: int = 1,
+                img_size: Tuple[int, int] = (256,256),
+                feature_size: int = 16,
+                spatial_dims: int = 2,
+                # hidden_size: int = 768,
+                # mlp_dim: int = 3072,
+                num_heads = [1, 2, 4, 8],
+                # pos_embed: str = "perceptron",
+                norm_name: Union[Tuple, str] = "instance",
+                conv_block: bool = False,
+                res_block: bool = True,
+                dropout_rate: float = 0.0,
+                debug: bool = False
+                 ):
+        super().__init__()
+        self.debug = debug
+        self.mit_b3 = MixVisionTransformer(img_size=img_size, patch_size=4, embed_dims=[feature_size*2, feature_size*4, feature_size*8, feature_size*16],
+                                           num_heads=num_heads, mlp_ratios=[4, 4, 4, 4], qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+                                           drop_rate=0.0, drop_path_rate=0.1)
+        self.encoder1 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            out_channels=feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder2 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=2 * feature_size,
+            out_channels=2 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder3 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=4 * feature_size,
+            out_channels=4 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder4 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=8 * feature_size,
+            out_channels=8 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.encoder5 = UnetrBasicBlock(
+            spatial_dims=spatial_dims,
+            in_channels=16 * feature_size,
+            out_channels=16 * feature_size,
+            kernel_size=3,
+            stride=1,
+            norm_name=norm_name,
+            res_block=True,
+        )
+        self.decoder4 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 16,
+            out_channels=feature_size * 8,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder3 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 8,
+            out_channels=feature_size * 4,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.decoder2 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 4,
+            out_channels=feature_size * 2,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.transp_conv = get_conv_layer(
+            spatial_dims=2,
+            in_channels=feature_size*2,
+            out_channels=feature_size*2,
+            kernel_size=3,
+            stride=2,
+            conv_only=True,
+            is_transposed=True,
+        )
+        self.decoder1 = UnetrUpBlock(
+            spatial_dims=2,
+            in_channels=feature_size * 2,
+            out_channels=feature_size,
+            kernel_size=3,
+            upsample_kernel_size=2,
+            norm_name=norm_name,
+            res_block=res_block,
+        )
+        self.out_interior = UnetOutBlock(spatial_dims=2, in_channels=feature_size, out_channels=out_channels)  # type: ignore
+        self.out_dist = UnetOutBlock(spatial_dims=2, in_channels=feature_size, out_channels=1)  # type: ignore
+    def forward(self, x_in):
+        hidden_states_out = self.mit_b3(x_in) # x: (B, 256,768), hidden_states_out: list, 12 elements, (B,256,768)
+        enc1 = self.encoder1(x_in) # (B, 16, 256, 256)
+        x1 = hidden_states_out[0] # (B, 32, 64, 64)
+        enc2 = self.encoder2(x1) # (B, 64, 32, 32)
+        x2 = hidden_states_out[1] # (B, 64, 32, 32)
+        enc3 = self.encoder3(x2) # (B, 128, 16, 16)
+        x3 = hidden_states_out[2] # (B, 128, 16,16)
+        enc4 = self.encoder4(x3) # (B, 256, 8, 8)
+        x4 = hidden_states_out[3] # (B, 256, 8, 8)
+        enc5 = self.encoder5(x4) # (B, 256, 8, 8)
+        # print(f"{enc1.shape=}, {enc2.shape=}, {enc3.shape=}, {enc4.shape=}, {enc5.shape=}")
+        dec4 = self.decoder4(enc5, enc4) # (B, 128, 16, 16);  up -> cat -> ResConv; (B, 128, 16, 16)
+        dec3 = self.decoder3(dec4, enc3) # (B, 64, 32, 32)
+        dec2 = self.decoder2(dec3, enc2) # (B, 32, 64, 64)
+        dec2_up = self.transp_conv(dec2) # [B, 32, 128, 128]
+        dec1 = self.decoder1(dec2_up, enc1) # (B, 16, 256, 256)
+        logits = self.out_interior(dec1)
+        dist = self.out_dist(dec1)
+        if self.debug:
+            return hidden_states_out, enc1, enc2, enc3, enc4, dec4, dec3, dec2, dec1, logits
+        else:
+            return logits, dist
+        # print(f"{dec1.shape=}, {dec2.shape=}, {dec3.shape=}, {dec4.shape=}, {logits.shape=}")
+#%% head
+class MLPEmbedding(nn.Module):
+    """
+    Linear Embedding
+    used in head
+    """
+    def __init__(self, input_dim=2048, embed_dim=768):
+        super().__init__()
+        self.proj = nn.Linear(input_dim, embed_dim)
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)
+        x = self.proj(x)
+        return x
+class All_MLP_Head(nn.Module):
+    """
+    All MLP head in segformer
+    Simple and Efficient Design for Semantic Segmentation with Transformers
+    """
+    def __init__(self, in_channels=[64,128,320,512], # channel number of multi-scale features
+                 in_index=[0,1,2,3],
+                 feature_strides=[4,8,16,32],
+                 dropout_ratio=0.1,
+                 num_classes=3,
+                 embedding_dim=768,
+                 output_hidden_states=False):
+        super().__init__()
+        self.in_channels = in_channels
+        assert len(feature_strides) == len(self.in_channels)
+        assert min(feature_strides) == feature_strides[0]
+        self.in_index = in_index
+        self.feature_strides = feature_strides
+        self.dropout_ratio = dropout_ratio
+        self.num_classes = num_classes
+        self.output_hidden_states = output_hidden_states
+        c1_in_channels, c2_in_channels, c3_in_channels, c4_in_channels = self.in_channels
+        # unify channel number to 768
+        self.linear_c4 = MLPEmbedding(input_dim=c4_in_channels, embed_dim=embedding_dim)
+        self.linear_c3 = MLPEmbedding(input_dim=c3_in_channels, embed_dim=embedding_dim)
+        self.linear_c2 = MLPEmbedding(input_dim=c2_in_channels, embed_dim=embedding_dim)
+        self.linear_c1 = MLPEmbedding(input_dim=c1_in_channels, embed_dim=embedding_dim)
+        self.linear_fuse = nn.Conv2d(in_channels=embedding_dim*4, out_channels=embedding_dim, kernel_size=1,bias=False)
+        self.batch_norm = nn.BatchNorm2d(embedding_dim) # 4: number of blocks
+        self.activation = nn.ReLU()
+        if dropout_ratio>0:
+            self.dropout = nn.Dropout2d(self.dropout_ratio)
+        self.linear_pred = nn.Conv2d(embedding_dim, self.num_classes, kernel_size=1)
+    def forward(self, inputs):
+        # x = self._transform_inputs(inputs)  # len=4, 1/4,1/8,1/16,1/32
+        c1, c2, c3, c4 = inputs
+        ############## MLP decoder on C1-C4 ###########
+        n, _, h, w = c4.shape
+        # normalize channel number and resample to 1/4 HxW
+        _c4 = self.linear_c4(c4).permute(0,2,1).reshape(n, -1, c4.shape[2], c4.shape[3])
+        _c4 = nn.functional.interpolate(_c4, size=c1.size()[2:], mode='bilinear',align_corners=False)
+        _c3 = self.linear_c3(c3).permute(0,2,1).reshape(n, -1, c3.shape[2], c3.shape[3])
+        _c3 = nn.functional.interpolate(_c3, size=c1.size()[2:], mode='bilinear',align_corners=False)
+        _c2 = self.linear_c2(c2).permute(0,2,1).reshape(n, -1, c2.shape[2], c2.shape[3])
+        _c2 = nn.functional.interpolate(_c2, size=c1.size()[2:], mode='bilinear',align_corners=False)
+        _c1 = self.linear_c1(c1).permute(0,2,1).reshape(n, -1, c1.shape[2], c1.shape[3])
+        # concatenate features
+        hidden_states = self.linear_fuse(torch.cat([_c4, _c3, _c2, _c1], dim=1))
+        hidden_states = self.batch_norm(hidden_states)
+        hidden_states = self.activation(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        # predict results
+        x = self.linear_pred(hidden_states)
+        if self.output_hidden_states:
+            return x, hidden_states
+        else:
+            return x
+#%% test different networks
+# img_size = 256
+# in_chans = 3
+# B = 2
+# input_img = torch.randn((B,in_chans,img_size,img_size))
+# b3 = mit_b3_demo(img_size=img_size)
+# b3_out = b3(input_img)
+# for feature in b3_out:
+#     print(f"{feature.shape=}")
+# head = All_MLP_Head()
+# outputs = head(b3_out)
+# print(f"{outputs.shape = }")

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+numpy
+scipy
+numba
+einops
+imagecodecs
+matplotlib
+monai
+pandas
+pillow
+scikit-image
+torch
+torchvision

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr  7 10:53:23 2022
+@author: jma
+"""

utils/multi_task_sliding_window_inference.py ADDED Viewed

	@@ -0,0 +1,187 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr  1 19:18:58 2022
+@author: jma
+"""
+from typing import Any, Callable, List, Sequence, Tuple, Union
+import torch
+import torch.nn.functional as F
+from monai.data.utils import compute_importance_map, dense_patch_slices, get_valid_patch_size
+from monai.utils import BlendMode, PytorchPadMode, fall_back_tuple, look_up_option
+__all__ = ["multi_task_sliding_window_inference"]
+def multi_task_sliding_window_inference(
+    inputs: torch.Tensor,
+    roi_size: Union[Sequence[int], int],
+    sw_batch_size: int,
+    predictor: Callable[..., torch.Tensor],
+    overlap = 0.25,
+    mode = "constant",
+    sigma_scale = 0.125,
+    padding_mode = "constant",
+    cval = 0.0,
+    sw_device = None,
+    device = None,
+    *args: Any,
+    **kwargs: Any,
+) -> torch.Tensor:
+    """
+    Sliding window inference on `inputs` with `predictor`.
+    When roi_size is larger than the inputs' spatial size, the input image are padded during inference.
+    To maintain the same spatial sizes, the output image will be cropped to the original input size.
+    Args:
+        inputs: input image to be processed (assuming NCHW[D])
+        roi_size: the spatial window size for inferences.
+            When its components have None or non-positives, the corresponding inputs dimension will be used.
+            if the components of the `roi_size` are non-positive values, the transform will use the
+            corresponding components of img size. For example, `roi_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        sw_batch_size: the batch size to run window slices.
+        predictor: given input tensor `patch_data` in shape NCHW[D], `predictor(patch_data)`
+            should return a prediction with the same spatial shape and batch_size, i.e. NMHW[D];
+            where HW[D] represents the patch spatial size, M is the number of output channels, N is `sw_batch_size`.
+        overlap: Amount of overlap between scans.
+        mode: {``"constant"``, ``"gaussian"``}
+            How to blend output of overlapping windows. Defaults to ``"constant"``.
+            - ``"constant``": gives equal weight to all predictions.
+            - ``"gaussian``": gives less weight to predictions on edges of windows.
+        sigma_scale: the standard deviation coefficient of the Gaussian window when `mode` is ``"gaussian"``.
+            Default: 0.125. Actual window sigma is ``sigma_scale`` * ``dim_size``.
+            When sigma_scale is a sequence of floats, the values denote sigma_scale at the corresponding
+            spatial dimensions.
+        padding_mode: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}
+            Padding mode for ``inputs``, when ``roi_size`` is larger than inputs. Defaults to ``"constant"``
+            See also: https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        cval: fill value for 'constant' padding mode. Default: 0
+        sw_device: device for the window data.
+            By default the device (and accordingly the memory) of the `inputs` is used.
+            Normally `sw_device` should be consistent with the device where `predictor` is defined.
+        device: device for the stitched output prediction.
+            By default the device (and accordingly the memory) of the `inputs` is used. If for example
+            set to device=torch.device('cpu') the gpu memory consumption is less and independent of the
+            `inputs` and `roi_size`. Output is on the `device`.
+        args: optional args to be passed to ``predictor``.
+        kwargs: optional keyword args to be passed to ``predictor``.
+    Note:
+        - input must be channel-first and have a batch dim, supports N-D sliding window.
+    """
+    num_spatial_dims = len(inputs.shape) - 2
+    if overlap < 0 or overlap >= 1:
+        raise AssertionError("overlap must be >= 0 and < 1.")
+    # determine image spatial size and batch size
+    # Note: all input images must have the same image size and batch size
+    image_size_ = list(inputs.shape[2:])
+    batch_size = inputs.shape[0]
+    if device is None:
+        device = inputs.device
+    if sw_device is None:
+        sw_device = inputs.device
+    roi_size = fall_back_tuple(roi_size, image_size_)
+    # in case that image size is smaller than roi size
+    image_size = tuple(max(image_size_[i], roi_size[i]) for i in range(num_spatial_dims))
+    pad_size = []
+    for k in range(len(inputs.shape) - 1, 1, -1):
+        diff = max(roi_size[k - 2] - inputs.shape[k], 0)
+        half = diff // 2
+        pad_size.extend([half, diff - half])
+    inputs = F.pad(inputs, pad=pad_size, mode=mode, value=cval)
+    scan_interval = _get_scan_interval(image_size, roi_size, num_spatial_dims, overlap)
+    # Store all slices in list
+    slices = dense_patch_slices(image_size, roi_size, scan_interval)
+    num_win = len(slices)  # number of windows per image
+    total_slices = num_win * batch_size  # total number of windows
+    # Create window-level importance map
+    importance_map = compute_importance_map(
+        get_valid_patch_size(image_size, roi_size), mode="gaussian", sigma_scale=sigma_scale, device=device
+    )
+    # Perform predictions
+    output_image, count_map = torch.tensor(0.0, device=device), torch.tensor(0.0, device=device)
+    output_dist = torch.tensor(0.0, device=device)
+    _initialized = False
+    for slice_g in range(0, total_slices, sw_batch_size):
+        slice_range = range(slice_g, min(slice_g + sw_batch_size, total_slices))
+        unravel_slice = [
+            [slice(int(idx / num_win), int(idx / num_win) + 1), slice(None)] + list(slices[idx % num_win])
+            for idx in slice_range
+        ]
+        window_data = torch.cat([inputs[win_slice] for win_slice in unravel_slice]).to(sw_device)
+        seg_logit, seg_dist = predictor(window_data)# .to(device)  # batched patch segmentation
+        seg_logit = torch.nn.functional.interpolate(seg_logit, size=roi_size, mode="bilinear", align_corners=False)
+        seg_logit = torch.softmax(seg_logit, dim=1)
+        seg_dist = torch.nn.functional.interpolate(seg_dist, size=roi_size, mode="bilinear", align_corners=False)
+        seg_dist = torch.sigmoid(seg_dist)
+        if not _initialized:  # init. buffer at the first iteration
+            output_classes = seg_logit.shape[1]
+            dist_class = seg_dist.shape[1]
+            output_shape = [batch_size, output_classes] + list(image_size)
+            output_dist_shape = [batch_size, dist_class] + list(image_size)
+            # allocate memory to store the full output and the count for overlapping parts
+            output_image = torch.zeros(output_shape, dtype=torch.float32, device=device)
+            output_dist = torch.zeros(output_dist_shape, dtype=torch.float32, device=device)
+            count_map = torch.zeros(output_shape, dtype=torch.float32, device=device)
+            count_dist_map = torch.zeros(output_dist_shape, dtype=torch.float32, device=device)
+            _initialized = True
+        # store the result in the proper location of the full output. Apply weights from importance map.
+        for idx, original_idx in zip(slice_range, unravel_slice):
+            output_image[original_idx] += importance_map * seg_logit[idx - slice_g]
+            output_dist[original_idx] += importance_map * seg_dist[idx - slice_g]
+            count_map[original_idx] += importance_map
+            count_dist_map[original_idx] += importance_map
+    # account for any overlapping sections
+    output_image = output_image / count_map
+    output_dist = output_dist / count_dist_map
+    final_slicing: List[slice] = []
+    for sp in range(num_spatial_dims):
+        slice_dim = slice(pad_size[sp * 2], image_size_[num_spatial_dims - sp - 1] + pad_size[sp * 2])
+        final_slicing.insert(0, slice_dim)
+    while len(final_slicing) < len(output_image.shape):
+        final_slicing.insert(0, slice(None))
+    return output_image[final_slicing], output_dist[final_slicing]
+def _get_scan_interval(
+    image_size: Sequence[int], roi_size: Sequence[int], num_spatial_dims: int, overlap: float
+) -> Tuple[int, ...]:
+    """
+    Compute scan interval according to the image size, roi size and overlap.
+    Scan interval will be `int((1 - overlap) * roi_size)`, if interval is 0,
+    use 1 instead to make sure sliding window works.
+    """
+    if len(image_size) != num_spatial_dims:
+        raise ValueError("image coord different from spatial dims.")
+    if len(roi_size) != num_spatial_dims:
+        raise ValueError("roi coord different from spatial dims.")
+    scan_interval = []
+    for i in range(num_spatial_dims):
+        if roi_size[i] == image_size[i]:
+            scan_interval.append(int(roi_size[i]))
+        else:
+            interval = int(roi_size[i] * (1 - overlap))
+            scan_interval.append(interval if interval > 0 else 1)
+    return tuple(scan_interval)

utils/postprocess.py ADDED Viewed

	@@ -0,0 +1,125 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr  7 10:51:48 2022
+@author: jma
+"""
+import numpy as np
+from skimage import segmentation, measure, exposure, morphology
+import scipy.ndimage as nd
+from tqdm import tqdm
+import skimage
+import colorsys
+def fill_holes(label_img, size=10, connectivity=1):
+    output_image = np.copy(label_img)
+    props = measure.regionprops(np.squeeze(label_img.astype('int')), cache=False)
+    for prop in props:
+        if prop.euler_number < 1:
+            patch = output_image[prop.slice]
+            filled = morphology.remove_small_holes(
+                ar=(patch == prop.label),
+                area_threshold=size,
+                connectivity=connectivity)
+            output_image[prop.slice] = np.where(filled, prop.label, patch)
+    return output_image
+def watershed_post(distmaps, interiors, dist_thre=0.1, interior_thre=0.2):
+    """
+    Parameters
+    ----------
+    distmaps : float (N, H, W) N is the number of cells
+        distance transform map of cell/nuclear [0,1].
+    interiors : float (N, H, W)
+        interior map of cell/nuclear [0,1].
+    Returns
+    -------
+    label_images : uint (N, H, W)
+        cell/nuclear instance segmentation.
+    """
+    label_images = []
+    for maxima, interior in zip(distmaps, interiors):# in interiors[0:num]:
+        interior = nd.gaussian_filter(interior.astype(np.float32), 2)
+        # find marker based on distance map
+        if skimage.__version__ > '0.18.2':
+            markers = measure.label(morphology.h_maxima(image=maxima, h=dist_thre, footprint=morphology.disk(2)))
+        else:
+            markers = measure.label(morphology.h_maxima(image=maxima, h=dist_thre, selem=morphology.disk(2)))
+        # print('distmap marker num:', np.max(markers), 'interior marker num:', np.max(makers_interior))
+        label_image = segmentation.watershed(-1 * interior, markers,
+                                mask=interior > interior_thre, # 0.2/0.3
+                                watershed_line=0)
+        label_image = morphology.remove_small_objects(label_image, min_size=15)
+        # fill in holes that lie completely within a segmentation label
+        label_image = fill_holes(label_image, size=15)
+        # Relabel the label image
+        label_image, _, _ = segmentation.relabel_sequential(label_image)
+        label_images.append(label_image)
+    label_images = np.stack(label_images, axis=0).astype(np.uint)
+    return label_images
+def hsv_to_rgb(arr):
+    hsv_to_rgb_channels = np.vectorize(colorsys.hsv_to_rgb)
+    h, s, v = np.rollaxis(arr, axis=-1)
+    r, g, b = hsv_to_rgb_channels(h, s, v)
+    rgb = np.stack((r,g,b), axis=-1)
+    return rgb
+def mask_overlay(img, masks):
+    """ overlay masks on image (set image to grayscale)
+    Adapted from https://github.com/MouseLand/cellpose/blob/06df602fbe074be02db3d716e280f0990816c726/cellpose/plot.py#L172
+    Parameters
+    ----------------
+    img: int or float, 2D or 3D array
+        img is of size [Ly x Lx (x nchan)]
+    masks: int, 2D array
+        masks where 0=NO masks; 1,2,...=mask labels
+    Returns
+    ----------------
+    RGB: uint8, 3D array
+        array of masks overlaid on grayscale image
+    """
+    if img.ndim>2:
+        img = img.astype(np.float32).mean(axis=-1)
+    else:
+        img = img.astype(np.float32)
+    HSV = np.zeros((img.shape[0], img.shape[1], 3), np.float32)
+    HSV[:,:,2] = np.clip((img / 255. if img.max() > 1 else img) * 1.5, 0, 1)
+    hues = np.linspace(0, 1, masks.max()+1)[np.random.permutation(masks.max())]
+    for n in range(int(masks.max())):
+        ipix = (masks==n+1).nonzero()
+        HSV[ipix[0],ipix[1],0] = hues[n]
+        HSV[ipix[0],ipix[1],1] = 1.0
+    RGB = (hsv_to_rgb(HSV) * 255).astype(np.uint8)
+    return RGB