File size: 7,405 Bytes
ecf08bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
from multiprocessing.pool import Pool
import numpy as np
from batchgenerators.utilities.file_and_folder_operations import *
from nnunet.configuration import default_num_threads
from nnunet.evaluation.evaluator import aggregate_scores
from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax
from nnunet.paths import network_training_output_dir, preprocessing_output_dir
from nnunet.postprocessing.connected_components import determine_postprocessing
def merge(args):
file1, file2, properties_file, out_file = args
if not isfile(out_file):
res1 = np.load(file1)['softmax']
res2 = np.load(file2)['softmax']
props = load_pickle(properties_file)
mn = np.mean((res1, res2), 0)
# Softmax probabilities are already at target spacing so this will not do any resampling (resampling parameters
# don't matter here)
save_segmentation_nifti_from_softmax(mn, out_file, props, 3, None, None, None, force_separate_z=None,
interpolation_order_z=0)
def ensemble(training_output_folder1, training_output_folder2, output_folder, task, validation_folder, folds, allow_ensembling: bool = True):
print("\nEnsembling folders\n", training_output_folder1, "\n", training_output_folder2)
output_folder_base = output_folder
output_folder = join(output_folder_base, "ensembled_raw")
# only_keep_largest_connected_component is the same for all stages
dataset_directory = join(preprocessing_output_dir, task)
plans = load_pickle(join(training_output_folder1, "plans.pkl")) # we need this only for the labels
files1 = []
files2 = []
property_files = []
out_files = []
gt_segmentations = []
folder_with_gt_segs = join(dataset_directory, "gt_segmentations")
# in the correct shape and we need the original geometry to restore the niftis
for f in folds:
validation_folder_net1 = join(training_output_folder1, "fold_%d" % f, validation_folder)
validation_folder_net2 = join(training_output_folder2, "fold_%d" % f, validation_folder)
if not isdir(validation_folder_net1):
raise AssertionError("Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net1)
if not isdir(validation_folder_net2):
raise AssertionError("Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net2)
# we need to ensure the validation was successful. We can verify this via the presence of the summary.json file
if not isfile(join(validation_folder_net1, 'summary.json')):
raise AssertionError("Validation directory incomplete: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net1)
if not isfile(join(validation_folder_net2, 'summary.json')):
raise AssertionError("Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net2)
patient_identifiers1_npz = [i[:-4] for i in subfiles(validation_folder_net1, False, None, 'npz', True)]
patient_identifiers2_npz = [i[:-4] for i in subfiles(validation_folder_net2, False, None, 'npz', True)]
# we don't do postprocessing anymore so there should not be any of that noPostProcess
patient_identifiers1_nii = [i[:-7] for i in subfiles(validation_folder_net1, False, None, suffix='nii.gz', sort=True) if not i.endswith("noPostProcess.nii.gz") and not i.endswith('_postprocessed.nii.gz')]
patient_identifiers2_nii = [i[:-7] for i in subfiles(validation_folder_net2, False, None, suffix='nii.gz', sort=True) if not i.endswith("noPostProcess.nii.gz") and not i.endswith('_postprocessed.nii.gz')]
if not all([i in patient_identifiers1_npz for i in patient_identifiers1_nii]):
raise AssertionError("Missing npz files in folder %s. Please run the validation for all models and folds with the '--npz' flag." % (validation_folder_net1))
if not all([i in patient_identifiers2_npz for i in patient_identifiers2_nii]):
raise AssertionError("Missing npz files in folder %s. Please run the validation for all models and folds with the '--npz' flag." % (validation_folder_net2))
patient_identifiers1_npz.sort()
patient_identifiers2_npz.sort()
assert all([i == j for i, j in zip(patient_identifiers1_npz, patient_identifiers2_npz)]), "npz filenames do not match. This should not happen."
maybe_mkdir_p(output_folder)
for p in patient_identifiers1_npz:
files1.append(join(validation_folder_net1, p + '.npz'))
files2.append(join(validation_folder_net2, p + '.npz'))
property_files.append(join(validation_folder_net1, p) + ".pkl")
out_files.append(join(output_folder, p + ".nii.gz"))
gt_segmentations.append(join(folder_with_gt_segs, p + ".nii.gz"))
p = Pool(default_num_threads)
p.map(merge, zip(files1, files2, property_files, out_files))
p.close()
p.join()
if not isfile(join(output_folder, "summary.json")) and len(out_files) > 0:
aggregate_scores(tuple(zip(out_files, gt_segmentations)), labels=plans['all_classes'],
json_output_file=join(output_folder, "summary.json"), json_task=task,
json_name=task + "__" + output_folder_base.split("/")[-1], num_threads=default_num_threads)
if allow_ensembling and not isfile(join(output_folder_base, "postprocessing.json")):
# now lets also look at postprocessing. We cannot just take what we determined in cross-validation and apply it
# here because things may have changed and may also be too inconsistent between the two networks
determine_postprocessing(output_folder_base, folder_with_gt_segs, "ensembled_raw", "temp",
"ensembled_postprocessed", default_num_threads, dice_threshold=0)
out_dir_all_json = join(network_training_output_dir, "summary_jsons")
json_out = load_json(join(output_folder_base, "ensembled_postprocessed", "summary.json"))
json_out["experiment_name"] = output_folder_base.split("/")[-1]
save_json(json_out, join(output_folder_base, "ensembled_postprocessed", "summary.json"))
maybe_mkdir_p(out_dir_all_json)
shutil.copy(join(output_folder_base, "ensembled_postprocessed", "summary.json"),
join(out_dir_all_json, "%s__%s.json" % (task, output_folder_base.split("/")[-1])))
|