# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from collections import OrderedDict from copy import deepcopy from batchgenerators.augmentations.utils import resize_segmentation from nnunet.configuration import default_num_threads, RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD from nnunet.preprocessing.cropping import get_case_identifier_from_npz, ImageCropper from skimage.transform import resize from scipy.ndimage.interpolation import map_coordinates import numpy as np from batchgenerators.utilities.file_and_folder_operations import * from multiprocessing.pool import Pool def get_do_separate_z(spacing, anisotropy_threshold=RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD): do_separate_z = (np.max(spacing) / np.min(spacing)) > anisotropy_threshold return do_separate_z def get_lowres_axis(new_spacing): axis = np.where(max(new_spacing) / np.array(new_spacing) == 1)[0] # find which axis is anisotropic return axis def resample_patient(data, seg, original_spacing, target_spacing, order_data=3, order_seg=0, force_separate_z=False, order_z_data=0, order_z_seg=0, separate_z_anisotropy_threshold=RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD): """ :param data: :param seg: :param original_spacing: :param target_spacing: :param order_data: :param order_seg: :param force_separate_z: if None then we dynamically decide how to resample along z, if True/False then always /never resample along z separately :param order_z_seg: only applies if do_separate_z is True :param order_z_data: only applies if do_separate_z is True :param separate_z_anisotropy_threshold: if max_spacing > separate_z_anisotropy_threshold * min_spacing (per axis) then resample along lowres axis with order_z_data/order_z_seg instead of order_data/order_seg :return: """ assert not ((data is None) and (seg is None)) if data is not None: assert len(data.shape) == 4, "data must be c x y z" if seg is not None: assert len(seg.shape) == 4, "seg must be c x y z" if data is not None: shape = np.array(data[0].shape) else: shape = np.array(seg[0].shape) new_shape = np.round(((np.array(original_spacing) / np.array(target_spacing)).astype(float) * shape)).astype(int) if force_separate_z is not None: do_separate_z = force_separate_z if force_separate_z: axis = get_lowres_axis(original_spacing) else: axis = None else: if get_do_separate_z(original_spacing, separate_z_anisotropy_threshold): do_separate_z = True axis = get_lowres_axis(original_spacing) elif get_do_separate_z(target_spacing, separate_z_anisotropy_threshold): do_separate_z = True axis = get_lowres_axis(target_spacing) else: do_separate_z = False axis = None if axis is not None: if len(axis) == 3: # every axis has the spacing, this should never happen, why is this code here? do_separate_z = False elif len(axis) == 2: # this happens for spacings like (0.24, 1.25, 1.25) for example. In that case we do not want to resample # separately in the out of plane axis do_separate_z = False else: pass if data is not None: data_reshaped = resample_data_or_seg(data, new_shape, False, axis, order_data, do_separate_z, order_z=order_z_data) else: data_reshaped = None if seg is not None: seg_reshaped = resample_data_or_seg(seg, new_shape, True, axis, order_seg, do_separate_z, order_z=order_z_seg) else: seg_reshaped = None return data_reshaped, seg_reshaped def resample_data_or_seg(data, new_shape, is_seg, axis=None, order=3, do_separate_z=False, order_z=0): """ separate_z=True will resample with order 0 along z :param data: :param new_shape: :param is_seg: :param axis: :param order: :param do_separate_z: :param cval: :param order_z: only applies if do_separate_z is True :return: """ assert len(data.shape) == 4, "data must be (c, x, y, z)" if is_seg: resize_fn = resize_segmentation kwargs = OrderedDict() else: resize_fn = resize kwargs = {'mode': 'edge', 'anti_aliasing': False} dtype_data = data.dtype shape = np.array(data[0].shape) new_shape = np.array(new_shape) if np.any(shape != new_shape): data = data.astype(float) if do_separate_z: print("separate z, order in z is", order_z, "order inplane is", order) assert len(axis) == 1, "only one anisotropic axis supported" axis = axis[0] if axis == 0: new_shape_2d = new_shape[1:] elif axis == 1: new_shape_2d = new_shape[[0, 2]] else: new_shape_2d = new_shape[:-1] reshaped_final_data = [] for c in range(data.shape[0]): reshaped_data = [] for slice_id in range(shape[axis]): if axis == 0: reshaped_data.append(resize_fn(data[c, slice_id], new_shape_2d, order, **kwargs)) elif axis == 1: reshaped_data.append(resize_fn(data[c, :, slice_id], new_shape_2d, order, **kwargs)) else: reshaped_data.append(resize_fn(data[c, :, :, slice_id], new_shape_2d, order, **kwargs)) reshaped_data = np.stack(reshaped_data, axis) if shape[axis] != new_shape[axis]: # The following few lines are blatantly copied and modified from sklearn's resize() rows, cols, dim = new_shape[0], new_shape[1], new_shape[2] orig_rows, orig_cols, orig_dim = reshaped_data.shape row_scale = float(orig_rows) / rows col_scale = float(orig_cols) / cols dim_scale = float(orig_dim) / dim map_rows, map_cols, map_dims = np.mgrid[:rows, :cols, :dim] map_rows = row_scale * (map_rows + 0.5) - 0.5 map_cols = col_scale * (map_cols + 0.5) - 0.5 map_dims = dim_scale * (map_dims + 0.5) - 0.5 coord_map = np.array([map_rows, map_cols, map_dims]) if not is_seg or order_z == 0: reshaped_final_data.append(map_coordinates(reshaped_data, coord_map, order=order_z, mode='nearest')[None]) else: unique_labels = np.unique(reshaped_data) reshaped = np.zeros(new_shape, dtype=dtype_data) for i, cl in enumerate(unique_labels): reshaped_multihot = np.round( map_coordinates((reshaped_data == cl).astype(float), coord_map, order=order_z, mode='nearest')) reshaped[reshaped_multihot > 0.5] = cl reshaped_final_data.append(reshaped[None]) else: reshaped_final_data.append(reshaped_data[None]) reshaped_final_data = np.vstack(reshaped_final_data) else: print("no separate z, order", order) reshaped = [] for c in range(data.shape[0]): reshaped.append(resize_fn(data[c], new_shape, order, **kwargs)[None]) reshaped_final_data = np.vstack(reshaped) return reshaped_final_data.astype(dtype_data) else: print("no resampling necessary") return data class GenericPreprocessor(object): def __init__(self, normalization_scheme_per_modality, use_nonzero_mask, transpose_forward: (tuple, list), intensityproperties=None): """ :param normalization_scheme_per_modality: dict {0:'nonCT'} :param use_nonzero_mask: {0:False} :param intensityproperties: """ self.transpose_forward = transpose_forward self.intensityproperties = intensityproperties self.normalization_scheme_per_modality = normalization_scheme_per_modality self.use_nonzero_mask = use_nonzero_mask self.resample_separate_z_anisotropy_threshold = RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD @staticmethod def load_cropped(cropped_output_dir, case_identifier): all_data = np.load(os.path.join(cropped_output_dir, "%s.npz" % case_identifier))['data'] # TODO this is hardcoded does not work for 3D data data = all_data[:1].astype(np.float32) seg = all_data[1:] with open(os.path.join(cropped_output_dir, "%s.pkl" % case_identifier), 'rb') as f: properties = pickle.load(f) return data, seg, properties def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=None): """ data and seg must already have been transposed by transpose_forward. properties are the un-transposed values (spacing etc) :param data: :param target_spacing: :param properties: :param seg: :param force_separate_z: :return: """ # target_spacing is already transposed, properties["original_spacing"] is not so we need to transpose it! # data, seg are already transposed. Double check this using the properties original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } # remove nans data[np.isnan(data)] = 0 data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 1, force_separate_z=force_separate_z, order_z_data=0, order_z_seg=0, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } print("before:", before, "\nafter: ", after, "\n") if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" for c in range(len(data)): scheme = self.normalization_scheme_per_modality[c] if scheme == "CT": # clip to lb and ub from train data foreground and use foreground mn and sd from training data assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" mean_intensity = self.intensityproperties[c]['mean'] std_intensity = self.intensityproperties[c]['sd'] lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] data[c] = np.clip(data[c], lower_bound, upper_bound) data[c] = (data[c] - mean_intensity) / std_intensity if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == "CT2": # clip to lb and ub from train data foreground, use mn and sd form each case for normalization assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] mask = (data[c] > lower_bound) & (data[c] < upper_bound) data[c] = np.clip(data[c], lower_bound, upper_bound) mn = data[c][mask].mean() sd = data[c][mask].std() data[c] = (data[c] - mn) / sd if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == 'noNorm': pass else: if use_nonzero_mask[c]: mask = seg[-1] >= 0 data[c][mask] = (data[c][mask] - data[c][mask].mean()) / (data[c][mask].std() + 1e-8) data[c][mask == 0] = 0 else: mn = data[c].mean() std = data[c].std() # print(data[c].shape, data[c].dtype, mn, std) data[c] = (data[c] - mn) / (std + 1e-8) return data, seg, properties def preprocess_test_case(self, data_files, target_spacing, seg_file=None, force_separate_z=None): data, seg, properties = ImageCropper.crop_from_list_of_files(data_files, seg_file) data = data.transpose((0, *[i + 1 for i in self.transpose_forward])) if not isinstance(seg, type(None)): seg = seg.transpose((0, *[i + 1 for i in self.transpose_forward])) data, seg, properties = self.resample_and_normalize(data, target_spacing, properties, seg, force_separate_z=force_separate_z) return data.astype(np.float32), seg, properties def _run_internal(self, target_spacing, case_identifier, output_folder_stage, cropped_output_dir, force_separate_z, all_classes): data, seg, properties = self.load_cropped(cropped_output_dir, case_identifier) data = data.transpose((0, *[i + 1 for i in self.transpose_forward])) seg = seg.transpose((0, *[i + 1 for i in self.transpose_forward])) data, seg, properties = self.resample_and_normalize(data, target_spacing, properties, seg, force_separate_z) all_data = np.vstack((data, seg)).astype(np.float32) # we need to find out where the classes are and sample some random locations # let's do 10.000 samples per class # seed this for reproducibility! num_samples = 10000 min_percent_coverage = 0.01 # at least 1% of the class voxels need to be selected, otherwise it may be too sparse rndst = np.random.RandomState(1234) class_locs = {} # TODO add second label (DONE) for i, labels in enumerate(all_classes): if not bool(labels): continue class_locs[i] = {} for c in labels: all_locs = np.argwhere(all_data[1+i] == c) if len(all_locs) == 0: class_locs[c] = [] continue target_num_samples = min(num_samples, len(all_locs)) target_num_samples = max(target_num_samples, int(np.ceil(len(all_locs) * min_percent_coverage))) selected = all_locs[rndst.choice(len(all_locs), target_num_samples, replace=False)] class_locs[i][c] = selected print(c, target_num_samples) properties['class_locations'] = class_locs print("saving: ", os.path.join(output_folder_stage, "%s.npz" % case_identifier)) np.savez_compressed(os.path.join(output_folder_stage, "%s.npz" % case_identifier), data=all_data.astype(np.float32)) with open(os.path.join(output_folder_stage, "%s.pkl" % case_identifier), 'wb') as f: pickle.dump(properties, f) def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): """ :param target_spacings: list of lists [[1.25, 1.25, 5]] :param input_folder_with_cropped_npz: dim: c, x, y, z | npz_file['data'] np.savez_compressed(fname.npz, data=arr) :param output_folder: :param num_threads: :param force_separate_z: None :return: """ print("Initializing to run preprocessing") print("npz folder:", input_folder_with_cropped_npz) print("output_folder:", output_folder) list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True) maybe_mkdir_p(output_folder) num_stages = len(target_spacings) if not isinstance(num_threads, (list, tuple, np.ndarray)): num_threads = [num_threads] * num_stages assert len(num_threads) == num_stages # we need to know which classes are present in this dataset so that we can precompute where these classes are # located. This is needed for oversampling foreground all_classes = load_pickle(join(input_folder_with_cropped_npz, 'dataset_properties.pkl'))['all_classes'] for i in range(num_stages): all_args = [] output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i) maybe_mkdir_p(output_folder_stage) spacing = target_spacings[i] for j, case in enumerate(list_of_cropped_npz_files): case_identifier = get_case_identifier_from_npz(case) args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z, all_classes all_args.append(args) p = Pool(num_threads[i]) p.starmap(self._run_internal, all_args) p.close() p.join() class Preprocessor3DDifferentResampling(GenericPreprocessor): def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=None): """ data and seg must already have been transposed by transpose_forward. properties are the un-transposed values (spacing etc) :param data: :param target_spacing: :param properties: :param seg: :param force_separate_z: :return: """ # target_spacing is already transposed, properties["original_spacing"] is not so we need to transpose it! # data, seg are already transposed. Double check this using the properties original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } # remove nans data[np.isnan(data)] = 0 data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 1, force_separate_z=force_separate_z, order_z_data=3, order_z_seg=1, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } print("before:", before, "\nafter: ", after, "\n") if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" for c in range(len(data)): scheme = self.normalization_scheme_per_modality[c] if scheme == "CT": # clip to lb and ub from train data foreground and use foreground mn and sd from training data assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" mean_intensity = self.intensityproperties[c]['mean'] std_intensity = self.intensityproperties[c]['sd'] lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] data[c] = np.clip(data[c], lower_bound, upper_bound) data[c] = (data[c] - mean_intensity) / std_intensity if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == "CT2": # clip to lb and ub from train data foreground, use mn and sd form each case for normalization assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] mask = (data[c] > lower_bound) & (data[c] < upper_bound) data[c] = np.clip(data[c], lower_bound, upper_bound) mn = data[c][mask].mean() sd = data[c][mask].std() data[c] = (data[c] - mn) / sd if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == 'noNorm': pass else: if use_nonzero_mask[c]: mask = seg[-1] >= 0 else: mask = np.ones(seg.shape[1:], dtype=bool) data[c][mask] = (data[c][mask] - data[c][mask].mean()) / (data[c][mask].std() + 1e-8) data[c][mask == 0] = 0 return data, seg, properties class Preprocessor3DBetterResampling(GenericPreprocessor): """ This preprocessor always uses force_separate_z=False. It does resampling to the target spacing with third order spline for data (just like GenericPreprocessor) and seg (unlike GenericPreprocessor). It never does separate resampling in z. """ def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=False): """ data and seg must already have been transposed by transpose_forward. properties are the un-transposed values (spacing etc) :param data: :param target_spacing: :param properties: :param seg: :param force_separate_z: :return: """ if force_separate_z is not False: print("WARNING: Preprocessor3DBetterResampling always uses force_separate_z=False. " "You specified %s. Your choice is overwritten" % str(force_separate_z)) force_separate_z = False # be safe assert force_separate_z is False # target_spacing is already transposed, properties["original_spacing"] is not so we need to transpose it! # data, seg are already transposed. Double check this using the properties original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } # remove nans data[np.isnan(data)] = 0 data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 3, force_separate_z=force_separate_z, order_z_data=99999, order_z_seg=99999, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } print("before:", before, "\nafter: ", after, "\n") if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" for c in range(len(data)): scheme = self.normalization_scheme_per_modality[c] if scheme == "CT": # clip to lb and ub from train data foreground and use foreground mn and sd from training data assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" mean_intensity = self.intensityproperties[c]['mean'] std_intensity = self.intensityproperties[c]['sd'] lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] data[c] = np.clip(data[c], lower_bound, upper_bound) data[c] = (data[c] - mean_intensity) / std_intensity if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == "CT2": # clip to lb and ub from train data foreground, use mn and sd form each case for normalization assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] mask = (data[c] > lower_bound) & (data[c] < upper_bound) data[c] = np.clip(data[c], lower_bound, upper_bound) mn = data[c][mask].mean() sd = data[c][mask].std() data[c] = (data[c] - mn) / sd if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == 'noNorm': pass else: if use_nonzero_mask[c]: mask = seg[-1] >= 0 else: mask = np.ones(seg.shape[1:], dtype=bool) data[c][mask] = (data[c][mask] - data[c][mask].mean()) / (data[c][mask].std() + 1e-8) data[c][mask == 0] = 0 return data, seg, properties class PreprocessorFor2D(GenericPreprocessor): def __init__(self, normalization_scheme_per_modality, use_nonzero_mask, transpose_forward: (tuple, list), intensityproperties=None): super(PreprocessorFor2D, self).__init__(normalization_scheme_per_modality, use_nonzero_mask, transpose_forward, intensityproperties) def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): print("Initializing to run preprocessing") print("npz folder:", input_folder_with_cropped_npz) print("output_folder:", output_folder) list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True) assert len(list_of_cropped_npz_files) != 0, "set list of files first" maybe_mkdir_p(output_folder) all_args = [] num_stages = len(target_spacings) # we need to know which classes are present in this dataset so that we can precompute where these classes are # located. This is needed for oversampling foreground all_classes = load_pickle(join(input_folder_with_cropped_npz, 'dataset_properties.pkl'))['all_classes'] for i in range(num_stages): output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i) maybe_mkdir_p(output_folder_stage) spacing = target_spacings[i] for j, case in enumerate(list_of_cropped_npz_files): case_identifier = get_case_identifier_from_npz(case) args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z, all_classes all_args.append(args) """ self._run_internal(spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z, all_classes) """ p = Pool(num_threads) p.starmap(self._run_internal, all_args) p.close() p.join() def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=None): original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } target_spacing[0] = original_spacing_transposed[0] data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 1, force_separate_z=force_separate_z, order_z_data=0, order_z_seg=0, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } print("before:", before, "\nafter: ", after, "\n") if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" print("normalization...") for c in range(len(data)): scheme = self.normalization_scheme_per_modality[c] if scheme == "CT": # clip to lb and ub from train data foreground and use foreground mn and sd from training data assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" mean_intensity = self.intensityproperties[c]['mean'] std_intensity = self.intensityproperties[c]['sd'] lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] data[c] = np.clip(data[c], lower_bound, upper_bound) data[c] = (data[c] - mean_intensity) / std_intensity if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == "CT2": # clip to lb and ub from train data foreground, use mn and sd form each case for normalization assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] mask = (data[c] > lower_bound) & (data[c] < upper_bound) data[c] = np.clip(data[c], lower_bound, upper_bound) mn = data[c][mask].mean() sd = data[c][mask].std() data[c] = (data[c] - mn) / sd if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == 'noNorm': pass else: if use_nonzero_mask[c]: mask = seg[-1] >= 0 else: if seg is not None: mask = np.ones(seg.shape[1:], dtype=bool) else: mask = np.ones(data[c].shape, dtype=bool) data[c][mask] = (data[c][mask] - data[c][mask].mean()) / (data[c][mask].std() + 1e-8) data[c][mask == 0] = 0 print("normalization done") return data, seg, properties class PreprocessorFor3D_LeaveOriginalZSpacing(GenericPreprocessor): """ 3d_lowres and 3d_fullres are not resampled along z! """ def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=None): """ if target_spacing[0] is None or nan we use original_spacing_transposed[0] (no resampling along z) :param data: :param target_spacing: :param properties: :param seg: :param force_separate_z: :return: """ original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } # remove nans data[np.isnan(data)] = 0 target_spacing = deepcopy(target_spacing) if target_spacing[0] is None or np.isnan(target_spacing[0]): target_spacing[0] = original_spacing_transposed[0] #print(target_spacing, original_spacing_transposed) data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 1, force_separate_z=force_separate_z, order_z_data=0, order_z_seg=0, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } st = "before:" + str(before) + '\nafter' + str(after) + "\n" print(st) if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" for c in range(len(data)): scheme = self.normalization_scheme_per_modality[c] if scheme == "CT": # clip to lb and ub from train data foreground and use foreground mn and sd from training data assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" mean_intensity = self.intensityproperties[c]['mean'] std_intensity = self.intensityproperties[c]['sd'] lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] data[c] = np.clip(data[c], lower_bound, upper_bound) data[c] = (data[c] - mean_intensity) / std_intensity if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == "CT2": # clip to lb and ub from train data foreground, use mn and sd form each case for normalization assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] mask = (data[c] > lower_bound) & (data[c] < upper_bound) data[c] = np.clip(data[c], lower_bound, upper_bound) mn = data[c][mask].mean() sd = data[c][mask].std() data[c] = (data[c] - mn) / sd if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == 'noNorm': pass else: if use_nonzero_mask[c]: mask = seg[-1] >= 0 else: mask = np.ones(seg.shape[1:], dtype=bool) data[c][mask] = (data[c][mask] - data[c][mask].mean()) / (data[c][mask].std() + 1e-8) data[c][mask == 0] = 0 return data, seg, properties def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): for i in range(len(target_spacings)): target_spacings[i][0] = None super().run(target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, default_num_threads, force_separate_z) class PreprocessorFor3D_NoResampling(GenericPreprocessor): def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=None): """ if target_spacing[0] is None or nan we use original_spacing_transposed[0] (no resampling along z) :param data: :param target_spacing: :param properties: :param seg: :param force_separate_z: :return: """ original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } # remove nans data[np.isnan(data)] = 0 target_spacing = deepcopy(original_spacing_transposed) #print(target_spacing, original_spacing_transposed) data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 1, force_separate_z=force_separate_z, order_z_data=0, order_z_seg=0, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } st = "before:" + str(before) + '\nafter' + str(after) + "\n" print(st) if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" for c in range(len(data)): scheme = self.normalization_scheme_per_modality[c] if scheme == "CT": # clip to lb and ub from train data foreground and use foreground mn and sd from training data assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" mean_intensity = self.intensityproperties[c]['mean'] std_intensity = self.intensityproperties[c]['sd'] lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] data[c] = np.clip(data[c], lower_bound, upper_bound) data[c] = (data[c] - mean_intensity) / std_intensity if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == "CT2": # clip to lb and ub from train data foreground, use mn and sd form each case for normalization assert self.intensityproperties is not None, "ERROR: if there is a CT then we need intensity properties" lower_bound = self.intensityproperties[c]['percentile_00_5'] upper_bound = self.intensityproperties[c]['percentile_99_5'] mask = (data[c] > lower_bound) & (data[c] < upper_bound) data[c] = np.clip(data[c], lower_bound, upper_bound) mn = data[c][mask].mean() sd = data[c][mask].std() data[c] = (data[c] - mn) / sd if use_nonzero_mask[c]: data[c][seg[-1] < 0] = 0 elif scheme == 'noNorm': pass else: if use_nonzero_mask[c]: mask = seg[-1] >= 0 else: mask = np.ones(seg.shape[1:], dtype=bool) data[c][mask] = (data[c][mask] - data[c][mask].mean()) / (data[c][mask].std() + 1e-8) data[c][mask == 0] = 0 return data, seg, properties class PreprocessorFor2D_noNormalization(GenericPreprocessor): def resample_and_normalize(self, data, target_spacing, properties, seg=None, force_separate_z=None): original_spacing_transposed = np.array(properties["original_spacing"])[self.transpose_forward] before = { 'spacing': properties["original_spacing"], 'spacing_transposed': original_spacing_transposed, 'data.shape (data is transposed)': data.shape } target_spacing[0] = original_spacing_transposed[0] data, seg = resample_patient(data, seg, np.array(original_spacing_transposed), target_spacing, 3, 1, force_separate_z=force_separate_z, order_z_data=0, order_z_seg=0, separate_z_anisotropy_threshold=self.resample_separate_z_anisotropy_threshold) after = { 'spacing': target_spacing, 'data.shape (data is resampled)': data.shape } print("before:", before, "\nafter: ", after, "\n") if seg is not None: # hippocampus 243 has one voxel with -2 as label. wtf? seg[seg < -1] = 0 properties["size_after_resampling"] = data[0].shape properties["spacing_after_resampling"] = target_spacing use_nonzero_mask = self.use_nonzero_mask assert len(self.normalization_scheme_per_modality) == len(data), "self.normalization_scheme_per_modality " \ "must have as many entries as data has " \ "modalities" assert len(self.use_nonzero_mask) == len(data), "self.use_nonzero_mask must have as many entries as data" \ " has modalities" return data, seg, properties