|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from collections import OrderedDict |
|
import numpy as np |
|
from multiprocessing import Pool |
|
|
|
from batchgenerators.dataloading.data_loader import SlimDataLoaderBase |
|
|
|
from nnunet.configuration import default_num_threads |
|
from nnunet.paths import preprocessing_output_dir |
|
from batchgenerators.utilities.file_and_folder_operations import * |
|
|
|
|
|
def get_case_identifiers(folder): |
|
case_identifiers = [i[:-4] for i in os.listdir(folder) if i.endswith("npz") and (i.find("segFromPrevStage") == -1)] |
|
return case_identifiers |
|
|
|
|
|
def get_case_identifiers_from_raw_folder(folder): |
|
case_identifiers = np.unique( |
|
[i[:-12] for i in os.listdir(folder) if i.endswith(".nii.gz") and (i.find("segFromPrevStage") == -1)]) |
|
return case_identifiers |
|
|
|
|
|
def convert_to_npy(args): |
|
if not isinstance(args, tuple): |
|
key = "data" |
|
npz_file = args |
|
else: |
|
npz_file, key = args |
|
if not isfile(npz_file[:-3] + "npy"): |
|
a = np.load(npz_file)[key] |
|
np.save(npz_file[:-3] + "npy", a) |
|
|
|
|
|
def save_as_npz(args): |
|
if not isinstance(args, tuple): |
|
key = "data" |
|
npy_file = args |
|
else: |
|
npy_file, key = args |
|
d = np.load(npy_file) |
|
np.savez_compressed(npy_file[:-3] + "npz", **{key: d}) |
|
|
|
|
|
def unpack_dataset(folder, threads=default_num_threads, key="data"): |
|
""" |
|
unpacks all npz files in a folder to npy (whatever you want to have unpacked must be saved unter key) |
|
:param folder: |
|
:param threads: |
|
:param key: |
|
:return: |
|
""" |
|
p = Pool(threads) |
|
npz_files = subfiles(folder, True, None, ".npz", True) |
|
p.map(convert_to_npy, zip(npz_files, [key] * len(npz_files))) |
|
p.close() |
|
p.join() |
|
|
|
|
|
|
|
def pack_dataset(folder, threads=default_num_threads, key="data"): |
|
p = Pool(threads) |
|
npy_files = subfiles(folder, True, None, ".npy", True) |
|
p.map(save_as_npz, zip(npy_files, [key] * len(npy_files))) |
|
p.close() |
|
p.join() |
|
|
|
|
|
def delete_npy(folder): |
|
case_identifiers = get_case_identifiers(folder) |
|
npy_files = [join(folder, i + ".npy") for i in case_identifiers] |
|
npy_files = [i for i in npy_files if isfile(i)] |
|
for n in npy_files: |
|
os.remove(n) |
|
|
|
|
|
def load_dataset(folder, num_cases_properties_loading_threshold=1000): |
|
|
|
print('loading dataset') |
|
case_identifiers = get_case_identifiers(folder) |
|
case_identifiers.sort() |
|
dataset = OrderedDict() |
|
for c in case_identifiers: |
|
dataset[c] = OrderedDict() |
|
dataset[c]['data_file'] = join(folder, "%s.npz" % c) |
|
|
|
|
|
dataset[c]['properties_file'] = join(folder, "%s.pkl" % c) |
|
|
|
if dataset[c].get('seg_from_prev_stage_file') is not None: |
|
dataset[c]['seg_from_prev_stage_file'] = join(folder, "%s_segs.npz" % c) |
|
|
|
if len(case_identifiers) <= num_cases_properties_loading_threshold: |
|
print('loading all case properties') |
|
for i in dataset.keys(): |
|
dataset[i]['properties'] = load_pickle(dataset[i]['properties_file']) |
|
|
|
return dataset |
|
|
|
|
|
def crop_2D_image_force_fg(img, crop_size, valid_voxels): |
|
""" |
|
img must be [c, x, y] |
|
img[-1] must be the segmentation with segmentation>0 being foreground |
|
:param img: |
|
:param crop_size: |
|
:param valid_voxels: voxels belonging to the selected class |
|
:return: |
|
""" |
|
assert len(valid_voxels.shape) == 2 |
|
|
|
if type(crop_size) not in (tuple, list): |
|
crop_size = [crop_size] * (len(img.shape) - 1) |
|
else: |
|
assert len(crop_size) == (len( |
|
img.shape) - 1), "If you provide a list/tuple as center crop make sure it has the same len as your data has dims (3d)" |
|
|
|
|
|
lb_x = crop_size[0] // 2 |
|
ub_x = img.shape[1] - crop_size[0] // 2 - crop_size[0] % 2 |
|
lb_y = crop_size[1] // 2 |
|
ub_y = img.shape[2] - crop_size[1] // 2 - crop_size[1] % 2 |
|
|
|
if len(valid_voxels) == 0: |
|
selected_center_voxel = (np.random.random_integers(lb_x, ub_x), |
|
np.random.random_integers(lb_y, ub_y)) |
|
else: |
|
selected_center_voxel = valid_voxels[np.random.choice(valid_voxels.shape[1]), :] |
|
|
|
selected_center_voxel = np.array(selected_center_voxel) |
|
for i in range(2): |
|
selected_center_voxel[i] = max(crop_size[i] // 2, selected_center_voxel[i]) |
|
selected_center_voxel[i] = min(img.shape[i + 1] - crop_size[i] // 2 - crop_size[i] % 2, |
|
selected_center_voxel[i]) |
|
|
|
result = img[:, (selected_center_voxel[0] - crop_size[0] // 2):( |
|
selected_center_voxel[0] + crop_size[0] // 2 + crop_size[0] % 2), |
|
(selected_center_voxel[1] - crop_size[1] // 2):( |
|
selected_center_voxel[1] + crop_size[1] // 2 + crop_size[1] % 2)] |
|
return result |
|
|
|
|
|
class DataLoader3D(SlimDataLoaderBase): |
|
def __init__(self, data, patch_size, final_patch_size, batch_size, has_prev_stage=False, |
|
oversample_foreground_percent=0.0, memmap_mode="r", pad_mode="edge", pad_kwargs_data=None, |
|
pad_sides=None): |
|
""" |
|
This is the basic data loader for 3D networks. It uses preprocessed data as produced by my (Fabian) preprocessing. |
|
You can load the data with load_dataset(folder) where folder is the folder where the npz files are located. If there |
|
are only npz files present in that folder, the data loader will unpack them on the fly. This may take a while |
|
and increase CPU usage. Therefore, I advise you to call unpack_dataset(folder) first, which will unpack all npz |
|
to npy. Don't forget to call delete_npy(folder) after you are done with training? |
|
Why all the hassle? Well the decathlon dataset is huge. Using npy for everything will consume >1 TB and that is uncool |
|
given that I (Fabian) will have to store that permanently on /datasets and my local computer. With this strategy all |
|
data is stored in a compressed format (factor 10 smaller) and only unpacked when needed. |
|
:param data: get this with load_dataset(folder, stage=0). Plug the return value in here and you are g2g (good to go) |
|
:param patch_size: what patch size will this data loader return? it is common practice to first load larger |
|
patches so that a central crop after data augmentation can be done to reduce border artifacts. If unsure, use |
|
get_patch_size() from data_augmentation.default_data_augmentation |
|
:param final_patch_size: what will the patch finally be cropped to (after data augmentation)? this is the patch |
|
size that goes into your network. We need this here because we will pad patients in here so that patches at the |
|
border of patients are sampled properly |
|
:param batch_size: |
|
:param num_batches: how many batches will the data loader produce before stopping? None=endless |
|
:param seed: |
|
:param stage: ignore this (Fabian only) |
|
:param random: Sample keys randomly; CAREFUL! non-random sampling requires batch_size=1, otherwise you will iterate batch_size times over the dataset |
|
:param oversample_foreground: half the batch will be forced to contain at least some foreground (equal prob for each of the foreground classes) |
|
""" |
|
super(DataLoader3D, self).__init__(data, batch_size, None) |
|
if pad_kwargs_data is None: |
|
pad_kwargs_data = OrderedDict() |
|
self.pad_kwargs_data = pad_kwargs_data |
|
self.pad_mode = pad_mode |
|
self.oversample_foreground_percent = oversample_foreground_percent |
|
self.final_patch_size = final_patch_size |
|
self.has_prev_stage = has_prev_stage |
|
self.patch_size = patch_size |
|
self.list_of_keys = list(self._data.keys()) |
|
|
|
|
|
self.need_to_pad = (np.array(patch_size) - np.array(final_patch_size)).astype(int) |
|
if pad_sides is not None: |
|
if not isinstance(pad_sides, np.ndarray): |
|
pad_sides = np.array(pad_sides) |
|
self.need_to_pad += pad_sides |
|
self.memmap_mode = memmap_mode |
|
self.num_channels = None |
|
self.pad_sides = pad_sides |
|
self.data_shape, self.seg_shape = self.determine_shapes() |
|
|
|
def get_do_oversample(self, batch_idx): |
|
return not batch_idx < round(self.batch_size * (1 - self.oversample_foreground_percent)) |
|
|
|
def determine_shapes(self): |
|
if self.has_prev_stage: |
|
num_seg = 3 |
|
else: |
|
num_seg = 1 |
|
|
|
k = list(self._data.keys())[0] |
|
if isfile(self._data[k]['data_file'][:-4] + ".npy"): |
|
case_all_data = np.load(self._data[k]['data_file'][:-4] + ".npy", self.memmap_mode) |
|
else: |
|
case_all_data = np.load(self._data[k]['data_file'])['data'] |
|
num_color_channels = case_all_data.shape[0] - 1 |
|
data_shape = (self.batch_size, num_color_channels, *self.patch_size) |
|
seg_shape = (self.batch_size, num_seg, *self.patch_size) |
|
return data_shape, seg_shape |
|
|
|
def generate_train_batch(self): |
|
selected_keys = np.random.choice(self.list_of_keys, self.batch_size, True, None) |
|
data = np.zeros(self.data_shape, dtype=np.float32) |
|
seg = np.zeros(self.seg_shape, dtype=np.float32) |
|
case_properties = [] |
|
for j, i in enumerate(selected_keys): |
|
|
|
|
|
if self.get_do_oversample(j): |
|
force_fg = True |
|
else: |
|
force_fg = False |
|
|
|
if 'properties' in self._data[i].keys(): |
|
properties = self._data[i]['properties'] |
|
else: |
|
properties = load_pickle(self._data[i]['properties_file']) |
|
case_properties.append(properties) |
|
|
|
|
|
|
|
if isfile(self._data[i]['data_file'][:-4] + ".npy"): |
|
case_all_data = np.load(self._data[i]['data_file'][:-4] + ".npy", self.memmap_mode) |
|
else: |
|
case_all_data = np.load(self._data[i]['data_file'])['data'] |
|
|
|
|
|
|
|
|
|
|
|
if self.has_prev_stage: |
|
if isfile(self._data[i]['seg_from_prev_stage_file'][:-4] + ".npy"): |
|
segs_from_previous_stage = np.load(self._data[i]['seg_from_prev_stage_file'][:-4] + ".npy", |
|
mmap_mode=self.memmap_mode)[None] |
|
else: |
|
segs_from_previous_stage = np.load(self._data[i]['seg_from_prev_stage_file'])['data'][None] |
|
|
|
|
|
seg_key = np.random.choice(segs_from_previous_stage.shape[0]) |
|
seg_from_previous_stage = segs_from_previous_stage[seg_key:seg_key + 1] |
|
assert all([i == j for i, j in zip(seg_from_previous_stage.shape[1:], case_all_data.shape[1:])]), \ |
|
"seg_from_previous_stage does not match the shape of case_all_data: %s vs %s" % \ |
|
(str(seg_from_previous_stage.shape[1:]), str(case_all_data.shape[1:])) |
|
else: |
|
seg_from_previous_stage = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
need_to_pad = self.need_to_pad.copy() |
|
for d in range(3): |
|
|
|
|
|
if need_to_pad[d] + case_all_data.shape[d + 1] < self.patch_size[d]: |
|
need_to_pad[d] = self.patch_size[d] - case_all_data.shape[d + 1] |
|
|
|
|
|
|
|
shape = case_all_data.shape[1:] |
|
lb_x = - need_to_pad[0] // 2 |
|
ub_x = shape[0] + need_to_pad[0] // 2 + need_to_pad[0] % 2 - self.patch_size[0] |
|
lb_y = - need_to_pad[1] // 2 |
|
ub_y = shape[1] + need_to_pad[1] // 2 + need_to_pad[1] % 2 - self.patch_size[1] |
|
lb_z = - need_to_pad[2] // 2 |
|
ub_z = shape[2] + need_to_pad[2] // 2 + need_to_pad[2] % 2 - self.patch_size[2] |
|
|
|
|
|
|
|
if not force_fg: |
|
bbox_x_lb = np.random.randint(lb_x, ub_x + 1) |
|
bbox_y_lb = np.random.randint(lb_y, ub_y + 1) |
|
bbox_z_lb = np.random.randint(lb_z, ub_z + 1) |
|
else: |
|
|
|
if 'class_locations' not in properties.keys(): |
|
raise RuntimeError("Please rerun the preprocessing with the newest version of nnU-Net!") |
|
|
|
|
|
foreground_classes = np.array( |
|
[i for i in properties['class_locations'].keys() if len(properties['class_locations'][i]) != 0]) |
|
foreground_classes = foreground_classes[foreground_classes > 0] |
|
|
|
if len(foreground_classes) == 0: |
|
|
|
selected_class = None |
|
voxels_of_that_class = None |
|
print('case does not contain any foreground classes', i) |
|
else: |
|
selected_class = np.random.choice(foreground_classes) |
|
|
|
voxels_of_that_class = properties['class_locations'][selected_class] |
|
|
|
if voxels_of_that_class is not None: |
|
selected_voxel = voxels_of_that_class[np.random.choice(len(voxels_of_that_class))] |
|
|
|
|
|
bbox_x_lb = max(lb_x, selected_voxel[0] - self.patch_size[0] // 2) |
|
bbox_y_lb = max(lb_y, selected_voxel[1] - self.patch_size[1] // 2) |
|
bbox_z_lb = max(lb_z, selected_voxel[2] - self.patch_size[2] // 2) |
|
else: |
|
|
|
bbox_x_lb = np.random.randint(lb_x, ub_x + 1) |
|
bbox_y_lb = np.random.randint(lb_y, ub_y + 1) |
|
bbox_z_lb = np.random.randint(lb_z, ub_z + 1) |
|
|
|
bbox_x_ub = bbox_x_lb + self.patch_size[0] |
|
bbox_y_ub = bbox_y_lb + self.patch_size[1] |
|
bbox_z_ub = bbox_z_lb + self.patch_size[2] |
|
|
|
|
|
|
|
|
|
|
|
valid_bbox_x_lb = max(0, bbox_x_lb) |
|
valid_bbox_x_ub = min(shape[0], bbox_x_ub) |
|
valid_bbox_y_lb = max(0, bbox_y_lb) |
|
valid_bbox_y_ub = min(shape[1], bbox_y_ub) |
|
valid_bbox_z_lb = max(0, bbox_z_lb) |
|
valid_bbox_z_ub = min(shape[2], bbox_z_ub) |
|
|
|
|
|
|
|
|
|
|
|
case_all_data = np.copy(case_all_data[:, valid_bbox_x_lb:valid_bbox_x_ub, |
|
valid_bbox_y_lb:valid_bbox_y_ub, |
|
valid_bbox_z_lb:valid_bbox_z_ub]) |
|
if seg_from_previous_stage is not None: |
|
seg_from_previous_stage = seg_from_previous_stage[:, valid_bbox_x_lb:valid_bbox_x_ub, |
|
valid_bbox_y_lb:valid_bbox_y_ub, |
|
valid_bbox_z_lb:valid_bbox_z_ub] |
|
|
|
data[j] = np.pad(case_all_data[:-1], ((0, 0), |
|
(-min(0, bbox_x_lb), max(bbox_x_ub - shape[0], 0)), |
|
(-min(0, bbox_y_lb), max(bbox_y_ub - shape[1], 0)), |
|
(-min(0, bbox_z_lb), max(bbox_z_ub - shape[2], 0))), |
|
self.pad_mode, **self.pad_kwargs_data) |
|
|
|
seg[j, 0] = np.pad(case_all_data[-1:], ((0, 0), |
|
(-min(0, bbox_x_lb), max(bbox_x_ub - shape[0], 0)), |
|
(-min(0, bbox_y_lb), max(bbox_y_ub - shape[1], 0)), |
|
(-min(0, bbox_z_lb), max(bbox_z_ub - shape[2], 0))), |
|
'constant', **{'constant_values': -1}) |
|
if seg_from_previous_stage is not None: |
|
seg[j, 1] = np.pad(seg_from_previous_stage, ((0, 0), |
|
(-min(0, bbox_x_lb), |
|
max(bbox_x_ub - shape[0], 0)), |
|
(-min(0, bbox_y_lb), |
|
max(bbox_y_ub - shape[1], 0)), |
|
(-min(0, bbox_z_lb), |
|
max(bbox_z_ub - shape[2], 0))), |
|
'constant', **{'constant_values': 0}) |
|
|
|
return {'data': data, 'seg': seg, 'properties': case_properties, 'keys': selected_keys} |
|
|
|
|
|
class DataLoader2D(SlimDataLoaderBase): |
|
def __init__(self, data, patch_size, final_patch_size, batch_size, oversample_foreground_percent=0.0, |
|
memmap_mode="r", pseudo_3d_slices=1, pad_mode="edge", |
|
pad_kwargs_data=None, pad_sides=None): |
|
""" |
|
This is the basic data loader for 2D networks. It uses preprocessed data as produced by my (Fabian) preprocessing. |
|
You can load the data with load_dataset(folder) where folder is the folder where the npz files are located. If there |
|
are only npz files present in that folder, the data loader will unpack them on the fly. This may take a while |
|
and increase CPU usage. Therefore, I advise you to call unpack_dataset(folder) first, which will unpack all npz |
|
to npy. Don't forget to call delete_npy(folder) after you are done with training? |
|
Why all the hassle? Well the decathlon dataset is huge. Using npy for everything will consume >1 TB and that is uncool |
|
given that I (Fabian) will have to store that permanently on /datasets and my local computer. With htis strategy all |
|
data is stored in a compressed format (factor 10 smaller) and only unpacked when needed. |
|
:param data: get this with load_dataset(folder, stage=0). Plug the return value in here and you are g2g (good to go) |
|
:param patch_size: what patch size will this data loader return? it is common practice to first load larger |
|
patches so that a central crop after data augmentation can be done to reduce border artifacts. If unsure, use |
|
get_patch_size() from data_augmentation.default_data_augmentation |
|
:param final_patch_size: what will the patch finally be cropped to (after data augmentation)? this is the patch |
|
size that goes into your network. We need this here because we will pad patients in here so that patches at the |
|
border of patients are sampled properly |
|
:param batch_size: |
|
:param num_batches: how many batches will the data loader produce before stopping? None=endless |
|
:param seed: |
|
:param stage: ignore this (Fabian only) |
|
:param transpose: ignore this |
|
:param random: sample randomly; CAREFUL! non-random sampling requires batch_size=1, otherwise you will iterate batch_size times over the dataset |
|
:param pseudo_3d_slices: 7 = 3 below and 3 above the center slice |
|
""" |
|
super(DataLoader2D, self).__init__(data, batch_size, None) |
|
if pad_kwargs_data is None: |
|
pad_kwargs_data = OrderedDict() |
|
self.pad_kwargs_data = pad_kwargs_data |
|
self.pad_mode = pad_mode |
|
self.pseudo_3d_slices = pseudo_3d_slices |
|
self.oversample_foreground_percent = oversample_foreground_percent |
|
self.final_patch_size = final_patch_size |
|
self.patch_size = patch_size |
|
self.list_of_keys = list(self._data.keys()) |
|
self.need_to_pad = np.array(patch_size) - np.array(final_patch_size) |
|
self.memmap_mode = memmap_mode |
|
if pad_sides is not None: |
|
if not isinstance(pad_sides, np.ndarray): |
|
pad_sides = np.array(pad_sides) |
|
self.need_to_pad += pad_sides |
|
self.pad_sides = pad_sides |
|
self.data_shape, self.seg_shape = self.determine_shapes() |
|
|
|
def determine_shapes(self): |
|
|
|
num_color_channels = 1 |
|
|
|
k = list(self._data.keys())[0] |
|
if isfile(self._data[k]['data_file'][:-4] + ".npy"): |
|
case_all_data = np.load(self._data[k]['data_file'][:-4] + ".npy", self.memmap_mode) |
|
else: |
|
case_all_data = np.load(self._data[k]['data_file'])['data'] |
|
|
|
num_seg = case_all_data.shape[0] - num_color_channels |
|
data_shape = (self.batch_size, num_color_channels, *self.patch_size) |
|
seg_shape = (self.batch_size, num_seg, *self.patch_size) |
|
return data_shape, seg_shape |
|
|
|
def get_do_oversample(self, batch_idx): |
|
return not batch_idx < round(self.batch_size * (1 - self.oversample_foreground_percent)) |
|
|
|
def generate_train_batch(self): |
|
selected_keys = np.random.choice(self.list_of_keys, self.batch_size, True, None) |
|
|
|
data = np.zeros(self.data_shape, dtype=np.float32) |
|
seg = np.zeros(self.seg_shape, dtype=np.float32) |
|
|
|
case_properties = [] |
|
for j, i in enumerate(selected_keys): |
|
if 'properties' in self._data[i].keys(): |
|
properties = self._data[i]['properties'] |
|
else: |
|
properties = load_pickle(self._data[i]['properties_file']) |
|
case_properties.append(properties) |
|
|
|
if self.get_do_oversample(j): |
|
force_fg = True |
|
else: |
|
force_fg = False |
|
|
|
if not isfile(self._data[i]['data_file'][:-4] + ".npy"): |
|
|
|
case_all_data = np.load(self._data[i]['data_file'][:-4] + ".npz")['data'] |
|
else: |
|
case_all_data = np.load(self._data[i]['data_file'][:-4] + ".npy", self.memmap_mode) |
|
|
|
|
|
if len(case_all_data.shape) == 3: |
|
case_all_data = case_all_data[:, None] |
|
|
|
|
|
if not force_fg: |
|
random_slice = np.random.choice(case_all_data.shape[1]) |
|
selected_class = None |
|
else: |
|
|
|
if 'class_locations' not in properties.keys(): |
|
raise RuntimeError("Please rerun the preprocessing with the newest version of nnU-Net!") |
|
|
|
foreground_classeses = [] |
|
for label in properties['class_locations'].keys(): |
|
foreground_classes = np.array( |
|
[i for i in properties['class_locations'][label].keys() if len(properties['class_locations'][label][i]) != 0]) |
|
foreground_classes = foreground_classes[foreground_classes > 0] |
|
foreground_classeses.append(foreground_classes) |
|
|
|
if len(foreground_classeses) == 0: |
|
selected_class = None |
|
random_slice = np.random.choice(case_all_data.shape[1]) |
|
print('case does not contain any foreground classes', i) |
|
else: |
|
selected_label = np.random.choice(len(properties['class_locations'])) |
|
selected_class = np.random.choice(foreground_classeses[selected_label]) |
|
|
|
voxels_of_that_class = properties['class_locations'][selected_label][selected_class] |
|
valid_slices = np.unique(voxels_of_that_class[:, 0]) |
|
random_slice = np.random.choice(valid_slices) |
|
voxels_of_that_class = voxels_of_that_class[voxels_of_that_class[:, 0] == random_slice] |
|
voxels_of_that_class = voxels_of_that_class[:, 1:] |
|
|
|
|
|
|
|
if self.pseudo_3d_slices == 1: |
|
case_all_data = case_all_data[:, random_slice] |
|
else: |
|
|
|
|
|
mn = random_slice - (self.pseudo_3d_slices - 1) // 2 |
|
mx = random_slice + (self.pseudo_3d_slices - 1) // 2 + 1 |
|
valid_mn = max(mn, 0) |
|
valid_mx = min(mx, case_all_data.shape[1]) |
|
case_all_seg = case_all_data[-1:] |
|
case_all_data = case_all_data[:-1] |
|
case_all_data = case_all_data[:, valid_mn:valid_mx] |
|
case_all_seg = case_all_seg[:, random_slice] |
|
need_to_pad_below = valid_mn - mn |
|
need_to_pad_above = mx - valid_mx |
|
if need_to_pad_below > 0: |
|
shp_for_pad = np.array(case_all_data.shape) |
|
shp_for_pad[1] = need_to_pad_below |
|
case_all_data = np.concatenate((np.zeros(shp_for_pad), case_all_data), 1) |
|
if need_to_pad_above > 0: |
|
shp_for_pad = np.array(case_all_data.shape) |
|
shp_for_pad[1] = need_to_pad_above |
|
case_all_data = np.concatenate((case_all_data, np.zeros(shp_for_pad)), 1) |
|
case_all_data = case_all_data.reshape((-1, case_all_data.shape[-2], case_all_data.shape[-1])) |
|
case_all_data = np.concatenate((case_all_data, case_all_seg), 0) |
|
|
|
|
|
assert len(case_all_data.shape) == 3 |
|
|
|
|
|
|
|
|
|
need_to_pad = self.need_to_pad.copy() |
|
for d in range(2): |
|
|
|
|
|
if need_to_pad[d] + case_all_data.shape[d + 1] < self.patch_size[d]: |
|
need_to_pad[d] = self.patch_size[d] - case_all_data.shape[d + 1] |
|
|
|
shape = case_all_data.shape[1:] |
|
lb_x = - need_to_pad[0] // 2 |
|
ub_x = shape[0] + need_to_pad[0] // 2 + need_to_pad[0] % 2 - self.patch_size[0] |
|
lb_y = - need_to_pad[1] // 2 |
|
ub_y = shape[1] + need_to_pad[1] // 2 + need_to_pad[1] % 2 - self.patch_size[1] |
|
|
|
|
|
|
|
if not force_fg or selected_class is None: |
|
bbox_x_lb = np.random.randint(lb_x, ub_x + 1) |
|
bbox_y_lb = np.random.randint(lb_y, ub_y + 1) |
|
else: |
|
|
|
selected_voxel = voxels_of_that_class[np.random.choice(len(voxels_of_that_class))] |
|
|
|
|
|
bbox_x_lb = max(lb_x, selected_voxel[0] - self.patch_size[0] // 2) |
|
bbox_y_lb = max(lb_y, selected_voxel[1] - self.patch_size[1] // 2) |
|
|
|
bbox_x_ub = bbox_x_lb + self.patch_size[0] |
|
bbox_y_ub = bbox_y_lb + self.patch_size[1] |
|
|
|
|
|
|
|
|
|
|
|
valid_bbox_x_lb = max(0, bbox_x_lb) |
|
valid_bbox_x_ub = min(shape[0], bbox_x_ub) |
|
valid_bbox_y_lb = max(0, bbox_y_lb) |
|
valid_bbox_y_ub = min(shape[1], bbox_y_ub) |
|
|
|
|
|
|
|
|
|
|
|
|
|
case_all_data = case_all_data[:, valid_bbox_x_lb:valid_bbox_x_ub, |
|
valid_bbox_y_lb:valid_bbox_y_ub] |
|
|
|
case_all_data_donly = np.pad(case_all_data[:1], ((0, 0), |
|
(-min(0, bbox_x_lb), max(bbox_x_ub - shape[0], 0)), |
|
(-min(0, bbox_y_lb), max(bbox_y_ub - shape[1], 0))), |
|
self.pad_mode, **self.pad_kwargs_data) |
|
|
|
case_all_data_segonly = np.pad(case_all_data[1:], ((0, 0), |
|
(-min(0, bbox_x_lb), max(bbox_x_ub - shape[0], 0)), |
|
(-min(0, bbox_y_lb), max(bbox_y_ub - shape[1], 0))), |
|
'constant', **{'constant_values': -1}) |
|
|
|
data[j] = case_all_data_donly |
|
seg[j] = case_all_data_segonly |
|
|
|
keys = selected_keys |
|
return {'data': data, 'seg': seg, 'properties': case_properties, "keys": keys} |
|
|
|
|
|
if __name__ == "__main__": |
|
t = "Task002_Heart" |
|
p = join(preprocessing_output_dir, t, "stage1") |
|
dataset = load_dataset(p) |
|
with open(join(join(preprocessing_output_dir, t), "plans_stage1.pkl"), 'rb') as f: |
|
plans = pickle.load(f) |
|
unpack_dataset(p) |
|
dl = DataLoader3D(dataset, (32, 32, 32), (32, 32, 32), 2, oversample_foreground_percent=0.33) |
|
dl = DataLoader3D(dataset, np.array(plans['patch_size']).astype(int), np.array(plans['patch_size']).astype(int), 2, |
|
oversample_foreground_percent=0.33) |
|
dl2d = DataLoader2D(dataset, (64, 64), np.array(plans['patch_size']).astype(int)[1:], 12, |
|
oversample_foreground_percent=0.33) |
|
|