|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json |
|
import os |
|
import pickle |
|
import shutil |
|
from collections import OrderedDict |
|
from multiprocessing import Pool |
|
|
|
import numpy as np |
|
from batchgenerators.utilities.file_and_folder_operations import join, isdir, maybe_mkdir_p, subfiles, subdirs, isfile |
|
from nnunet.configuration import default_num_threads |
|
from nnunet.experiment_planning.DatasetAnalyzer import DatasetAnalyzer |
|
from nnunet.experiment_planning.common_utils import split_4d_nifti |
|
from nnunet.paths import nnUNet_raw_data, nnUNet_cropped_data, preprocessing_output_dir |
|
from nnunet.preprocessing.cropping import ImageCropper |
|
|
|
|
|
def split_4d(input_folder, num_processes=default_num_threads, overwrite_task_output_id=None): |
|
assert isdir(join(input_folder, "imagesTr")) and isdir(join(input_folder, "labelsTr")) and \ |
|
isfile(join(input_folder, "dataset.json")), \ |
|
"The input folder must be a valid Task folder from the Medical Segmentation Decathlon with at least the " \ |
|
"imagesTr and labelsTr subfolders and the dataset.json file" |
|
|
|
while input_folder.endswith("/"): |
|
input_folder = input_folder[:-1] |
|
|
|
full_task_name = input_folder.split("/")[-1] |
|
|
|
assert full_task_name.startswith("Task"), "The input folder must point to a folder that starts with TaskXX_" |
|
|
|
first_underscore = full_task_name.find("_") |
|
assert first_underscore == 6, "Input folder start with TaskXX with XX being a 3-digit id: 00, 01, 02 etc" |
|
|
|
input_task_id = int(full_task_name[4:6]) |
|
if overwrite_task_output_id is None: |
|
overwrite_task_output_id = input_task_id |
|
|
|
task_name = full_task_name[7:] |
|
|
|
output_folder = join(nnUNet_raw_data, "Task%03.0d_" % overwrite_task_output_id + task_name) |
|
|
|
if isdir(output_folder): |
|
shutil.rmtree(output_folder) |
|
|
|
files = [] |
|
output_dirs = [] |
|
|
|
maybe_mkdir_p(output_folder) |
|
for subdir in ["imagesTr", "imagesTs"]: |
|
curr_out_dir = join(output_folder, subdir) |
|
if not isdir(curr_out_dir): |
|
os.mkdir(curr_out_dir) |
|
curr_dir = join(input_folder, subdir) |
|
nii_files = [join(curr_dir, i) for i in os.listdir(curr_dir) if i.endswith(".nii.gz")] |
|
nii_files.sort() |
|
for n in nii_files: |
|
files.append(n) |
|
output_dirs.append(curr_out_dir) |
|
|
|
shutil.copytree(join(input_folder, "labelsTr"), join(output_folder, "labelsTr")) |
|
|
|
p = Pool(num_processes) |
|
p.starmap(split_4d_nifti, zip(files, output_dirs)) |
|
p.close() |
|
p.join() |
|
shutil.copy(join(input_folder, "dataset.json"), output_folder) |
|
|
|
|
|
def create_lists_from_splitted_dataset(base_folder_splitted): |
|
lists = [] |
|
|
|
json_file = join(base_folder_splitted, "dataset.json") |
|
with open(json_file) as jsn: |
|
d = json.load(jsn) |
|
training_files = d['training'] |
|
num_modalities = len(d['modality'].keys()) |
|
for tr in training_files: |
|
cur_pat = [] |
|
for mod in range(num_modalities): |
|
cur_pat.append(join(base_folder_splitted, "imagesTr", tr['image'].split("/")[-1][:-7] + |
|
"_%04.0d.nii.gz" % mod)) |
|
cur_pat.append(join(base_folder_splitted, "labelsTr", tr['label'].split("/")[-1])) |
|
lists.append(cur_pat) |
|
return lists, {int(i): d['modality'][str(i)] for i in d['modality'].keys()} |
|
|
|
|
|
def create_lists_from_splitted_dataset_folder(folder): |
|
""" |
|
does not rely on dataset.json |
|
:param folder: |
|
:return: |
|
""" |
|
caseIDs = get_caseIDs_from_splitted_dataset_folder(folder) |
|
list_of_lists = [] |
|
for f in caseIDs: |
|
list_of_lists.append(subfiles(folder, prefix=f, suffix=".nii.gz", join=True, sort=True)) |
|
return list_of_lists |
|
|
|
|
|
def get_caseIDs_from_splitted_dataset_folder(folder): |
|
files = subfiles(folder, suffix=".nii.gz", join=False) |
|
|
|
files = [i[:-12] for i in files] |
|
|
|
files = np.unique(files) |
|
return files |
|
|
|
|
|
def crop(task_string, override=False, num_threads=default_num_threads): |
|
cropped_out_dir = join(nnUNet_cropped_data, task_string) |
|
maybe_mkdir_p(cropped_out_dir) |
|
|
|
if override and isdir(cropped_out_dir): |
|
shutil.rmtree(cropped_out_dir) |
|
maybe_mkdir_p(cropped_out_dir) |
|
|
|
splitted_4d_output_dir_task = join(nnUNet_raw_data, task_string) |
|
lists, _ = create_lists_from_splitted_dataset(splitted_4d_output_dir_task) |
|
|
|
imgcrop = ImageCropper(num_threads, cropped_out_dir) |
|
imgcrop.run_cropping(lists, overwrite_existing=override) |
|
shutil.copy(join(nnUNet_raw_data, task_string, "dataset.json"), cropped_out_dir) |
|
|
|
|
|
def analyze_dataset(task_string, override=False, collect_intensityproperties=True, num_processes=default_num_threads): |
|
cropped_out_dir = join(nnUNet_cropped_data, task_string) |
|
dataset_analyzer = DatasetAnalyzer(cropped_out_dir, overwrite=override, num_processes=num_processes) |
|
_ = dataset_analyzer.analyze_dataset(collect_intensityproperties) |
|
|
|
|
|
def plan_and_preprocess(task_string, processes_lowres=default_num_threads, processes_fullres=3, no_preprocessing=False): |
|
from nnunet.experiment_planning.experiment_planner_baseline_2DUNet import ExperimentPlanner2D |
|
from nnunet.experiment_planning.experiment_planner_baseline_3DUNet import ExperimentPlanner |
|
|
|
preprocessing_output_dir_this_task_train = join(preprocessing_output_dir, task_string) |
|
cropped_out_dir = join(nnUNet_cropped_data, task_string) |
|
maybe_mkdir_p(preprocessing_output_dir_this_task_train) |
|
|
|
shutil.copy(join(cropped_out_dir, "dataset_properties.pkl"), preprocessing_output_dir_this_task_train) |
|
shutil.copy(join(nnUNet_raw_data, task_string, "dataset.json"), preprocessing_output_dir_this_task_train) |
|
|
|
exp_planner = ExperimentPlanner(cropped_out_dir, preprocessing_output_dir_this_task_train) |
|
exp_planner.plan_experiment() |
|
if not no_preprocessing: |
|
exp_planner.run_preprocessing((processes_lowres, processes_fullres)) |
|
|
|
exp_planner = ExperimentPlanner2D(cropped_out_dir, preprocessing_output_dir_this_task_train) |
|
exp_planner.plan_experiment() |
|
if not no_preprocessing: |
|
exp_planner.run_preprocessing(processes_fullres) |
|
|
|
|
|
|
|
|
|
if not no_preprocessing: |
|
p = Pool(default_num_threads) |
|
|
|
|
|
|
|
stages = [i for i in subdirs(preprocessing_output_dir_this_task_train, join=True, sort=True) |
|
if i.split("/")[-1].find("stage") != -1] |
|
for s in stages: |
|
print(s.split("/")[-1]) |
|
list_of_npz_files = subfiles(s, True, None, ".npz", True) |
|
list_of_pkl_files = [i[:-4]+".pkl" for i in list_of_npz_files] |
|
all_classes = [] |
|
for pk in list_of_pkl_files: |
|
with open(pk, 'rb') as f: |
|
props = pickle.load(f) |
|
all_classes_tmp = np.array(props['classes']) |
|
all_classes.append(all_classes_tmp[all_classes_tmp >= 0]) |
|
p.map(add_classes_in_slice_info, zip(list_of_npz_files, list_of_pkl_files, all_classes)) |
|
p.close() |
|
p.join() |
|
|
|
|
|
def add_classes_in_slice_info(args): |
|
""" |
|
We need this for 2D dataloader with oversampling. As of now it will detect slices that contain specific classes |
|
at run time, meaning it needs to iterate over an entire patient just to extract one slice. That is obviously bad, |
|
so we are doing this once beforehand and just give the dataloader the info it needs in the patients pkl file. |
|
|
|
""" |
|
npz_file, pkl_file, all_classes = args |
|
seg_map = np.load(npz_file)['data'][-1] |
|
with open(pkl_file, 'rb') as f: |
|
props = pickle.load(f) |
|
|
|
print(pkl_file) |
|
|
|
|
|
|
|
classes_in_slice = OrderedDict() |
|
for axis in range(3): |
|
other_axes = tuple([i for i in range(3) if i != axis]) |
|
classes_in_slice[axis] = OrderedDict() |
|
for c in all_classes: |
|
valid_slices = np.where(np.sum(seg_map == c, axis=other_axes) > 0)[0] |
|
classes_in_slice[axis][c] = valid_slices |
|
|
|
number_of_voxels_per_class = OrderedDict() |
|
for c in all_classes: |
|
number_of_voxels_per_class[c] = np.sum(seg_map == c) |
|
|
|
props['classes_in_slice_per_axis'] = classes_in_slice |
|
props['number_of_voxels_per_class'] = number_of_voxels_per_class |
|
|
|
with open(pkl_file, 'wb') as f: |
|
pickle.dump(props, f) |
|
|