ChrisXiao
/

AutoSeg4ETICA

Image Segmentation

Model card Files Files and versions Community

AutoSeg4ETICA / nnunet /dataset_conversion /utils.py

Chris Xiao

upload files

c642393 5 months ago

history blame contribute delete

3.64 kB


	# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	from typing import Tuple
	import numpy as np
	from batchgenerators.utilities.file_and_folder_operations import *


	def get_identifiers_from_splitted_files(folder: str):
	uniques = np.unique([i[:-12] for i in subfiles(folder, suffix='.nii.gz', join=False)])
	return uniques


	def generate_dataset_json(output_file: str, imagesTr_dir: str, imagesTs_dir: str, modalities: Tuple,
	labels: dict, dataset_name: str, sort_keys=True, license: str = "hands off!", dataset_description: str = "",
	dataset_reference="", dataset_release='0.0'):
	"""
	:param output_file: This needs to be the full path to the dataset.json you intend to write, so
	output_file='DATASET_PATH/dataset.json' where the folder DATASET_PATH points to is the one with the
	imagesTr and labelsTr subfolders
	:param imagesTr_dir: path to the imagesTr folder of that dataset
	:param imagesTs_dir: path to the imagesTs folder of that dataset. Can be None
	:param modalities: tuple of strings with modality names. must be in the same order as the images (first entry
	corresponds to _0000.nii.gz, etc). Example: ('T1', 'T2', 'FLAIR').
	:param labels: dict with int->str (key->value) mapping the label IDs to label names. Note that 0 is always
	supposed to be background! Example: {0: 'background', 1: 'edema', 2: 'enhancing tumor'}
	:param dataset_name: The name of the dataset. Can be anything you want
	:param sort_keys: In order to sort or not, the keys in dataset.json
	:param license:
	:param dataset_description:
	:param dataset_reference: website of the dataset, if available
	:param dataset_release:
	:return:
	"""
	train_identifiers = get_identifiers_from_splitted_files(imagesTr_dir)

	if imagesTs_dir is not None:
	test_identifiers = get_identifiers_from_splitted_files(imagesTs_dir)
	else:
	test_identifiers = []

	json_dict = {}
	json_dict['name'] = dataset_name
	json_dict['description'] = dataset_description
	json_dict['tensorImageSize'] = "4D"
	json_dict['reference'] = dataset_reference
	json_dict['licence'] = license
	json_dict['release'] = dataset_release
	json_dict['modality'] = {str(i): modalities[i] for i in range(len(modalities))}
	json_dict['labels'] = {str(i): labels[i] for i in labels.keys()}

	json_dict['numTraining'] = len(train_identifiers)
	json_dict['numTest'] = len(test_identifiers)
	json_dict['training'] = [
	{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i
	in
	train_identifiers]
	json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_identifiers]

	if not output_file.endswith("dataset.json"):
	print("WARNING: output file name is not dataset.json! This may be intentional or not. You decide. "
	"Proceeding anyways...")
	save_json(json_dict, os.path.join(output_file), sort_keys=sort_keys)