# -*- coding: utf-8 -*- # Prepare MoNuSeg Dataset By converting and resorting files # # @ Fabian Hörst, fabian.hoerst@uk-essen.de # Institute for Artifical Intelligence in Medicine, # University Medicine Essen import inspect import os import sys currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) parentdir = os.path.dirname(parentdir) sys.path.insert(0, parentdir) import numpy as np from pathlib import Path from PIL import Image from tqdm import tqdm import argparse from cell_segmentation.utils.metrics import remap_label def process_fold(fold, input_path, output_path) -> None: fold_path = Path(input_path) / f"fold{fold}" output_fold_path = Path(output_path) / f"fold{fold}" output_fold_path.mkdir(exist_ok=True, parents=True) (output_fold_path / "images").mkdir(exist_ok=True, parents=True) (output_fold_path / "labels").mkdir(exist_ok=True, parents=True) print(f"Fold: {fold}") print("Loading large numpy files, this may take a while") images = np.load(fold_path / "images.npy") masks = np.load(fold_path / "masks.npy") print("Process images") for i in tqdm(range(len(images)), total=len(images)): outname = f"{fold}_{i}.png" out_img = images[i] im = Image.fromarray(out_img.astype(np.uint8)) im.save(output_fold_path / "images" / outname) print("Process masks") for i in tqdm(range(len(images)), total=len(images)): outname = f"{fold}_{i}.npy" # need to create instance map and type map with shape 256x256 mask = masks[i] inst_map = np.zeros((256, 256)) num_nuc = 0 for j in range(5): # copy value from new array if value is not equal 0 layer_res = remap_label(mask[:, :, j]) # inst_map = np.where(mask[:,:,j] != 0, mask[:,:,j], inst_map) inst_map = np.where(layer_res != 0, layer_res + num_nuc, inst_map) num_nuc = num_nuc + np.max(layer_res) inst_map = remap_label(inst_map) type_map = np.zeros((256, 256)).astype(np.int32) for j in range(5): layer_res = ((j + 1) * np.clip(mask[:, :, j], 0, 1)).astype(np.int32) type_map = np.where(layer_res != 0, layer_res, type_map) outdict = {"inst_map": inst_map, "type_map": type_map} np.save(output_fold_path / "labels" / outname, outdict) parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Perform CellViT inference for given run-directory with model checkpoints and logs", ) parser.add_argument( "--input_path", type=str, help="Input path of the original PanNuke dataset", required=True, ) parser.add_argument( "--output_path", type=str, help="Output path to store the processed PanNuke dataset", required=True, ) if __name__ == "__main__": opt = parser.parse_args() configuration = vars(opt) input_path = Path(configuration["input_path"]) output_path = Path(configuration["output_path"]) for fold in [0, 1, 2]: process_fold(fold, input_path, output_path)