nnUNet_calvingfront_detection / nnunet /dataset_conversion /Task027_AutomaticCardiacDetectionChallenge.py
ho11laqe's picture
init
ecf08bc
raw history blame
No virus
4.75 kB
# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from batchgenerators.utilities.file_and_folder_operations import *
import shutil
import numpy as np
from sklearn.model_selection import KFold
def convert_to_submission(source_dir, target_dir):
niftis = subfiles(source_dir, join=False, suffix=".nii.gz")
patientids = np.unique([i[:10] for i in niftis])
maybe_mkdir_p(target_dir)
for p in patientids:
files_of_that_patient = subfiles(source_dir, prefix=p, suffix=".nii.gz", join=False)
assert len(files_of_that_patient)
files_of_that_patient.sort()
# first is ED, second is ES
shutil.copy(join(source_dir, files_of_that_patient[0]), join(target_dir, p + "_ED.nii.gz"))
shutil.copy(join(source_dir, files_of_that_patient[1]), join(target_dir, p + "_ES.nii.gz"))
if __name__ == "__main__":
folder = "/media/fabian/My Book/datasets/ACDC/training"
folder_test = "/media/fabian/My Book/datasets/ACDC/testing/testing"
out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task027_ACDC"
maybe_mkdir_p(join(out_folder, "imagesTr"))
maybe_mkdir_p(join(out_folder, "imagesTs"))
maybe_mkdir_p(join(out_folder, "labelsTr"))
# train
all_train_files = []
patient_dirs_train = subfolders(folder, prefix="patient")
for p in patient_dirs_train:
current_dir = p
data_files_train = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
corresponding_seg_files = [i[:-7] + "_gt.nii.gz" for i in data_files_train]
for d, s in zip(data_files_train, corresponding_seg_files):
patient_identifier = d.split("/")[-1][:-7]
all_train_files.append(patient_identifier + "_0000.nii.gz")
shutil.copy(d, join(out_folder, "imagesTr", patient_identifier + "_0000.nii.gz"))
shutil.copy(s, join(out_folder, "labelsTr", patient_identifier + ".nii.gz"))
# test
all_test_files = []
patient_dirs_test = subfolders(folder_test, prefix="patient")
for p in patient_dirs_test:
current_dir = p
data_files_test = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
for d in data_files_test:
patient_identifier = d.split("/")[-1][:-7]
all_test_files.append(patient_identifier + "_0000.nii.gz")
shutil.copy(d, join(out_folder, "imagesTs", patient_identifier + "_0000.nii.gz"))
json_dict = OrderedDict()
json_dict['name'] = "ACDC"
json_dict['description'] = "cardias cine MRI segmentation"
json_dict['tensorImageSize'] = "4D"
json_dict['reference'] = "see ACDC challenge"
json_dict['licence'] = "see ACDC challenge"
json_dict['release'] = "0.0"
json_dict['modality'] = {
"0": "MRI",
}
json_dict['labels'] = {
"0": "background",
"1": "RV",
"2": "MLV",
"3": "LVC"
}
json_dict['numTraining'] = len(all_train_files)
json_dict['numTest'] = len(all_test_files)
json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-12], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-12]} for i in
all_train_files]
json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-12] for i in all_test_files]
save_json(json_dict, os.path.join(out_folder, "dataset.json"))
# create a dummy split (patients need to be separated)
splits = []
patients = np.unique([i[:10] for i in all_train_files])
patientids = [i[:-12] for i in all_train_files]
kf = KFold(5, True, 12345)
for tr, val in kf.split(patients):
splits.append(OrderedDict())
tr_patients = patients[tr]
splits[-1]['train'] = [i[:-12] for i in all_train_files if i[:10] in tr_patients]
val_patients = patients[val]
splits[-1]['val'] = [i[:-12] for i in all_train_files if i[:10] in val_patients]
save_pickle(splits, "/media/fabian/nnunet/Task027_ACDC/splits_final.pkl")