File size: 4,324 Bytes
ecf08bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
#
#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.


import numpy as np
import subprocess
from collections import OrderedDict
from nnunet.paths import nnUNet_raw_data
from batchgenerators.utilities.file_and_folder_operations import *
import shutil
from skimage import io
import SimpleITK as sitk
import shutil


if __name__ == "__main__":
    # download from here https://www.epfl.ch/labs/cvlab/data/data-em/

    base = "/media/fabian/My Book/datasets/EPFL_MITO_SEG"
    # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
    # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
    train_volume = io.imread(join(base, "training.tif"))
    train_labels = io.imread(join(base, "training_groundtruth.tif"))
    train_labels[train_labels == 255] = 1
    test_volume = io.imread(join(base, "testing.tif"))
    test_labels = io.imread(join(base, "testing_groundtruth.tif"))
    test_labels[test_labels == 255] = 1

    task_id = 59
    task_name = "EPFL_EM_MITO_SEG"

    foldername = "Task%03.0d_%s" % (task_id, task_name)

    out_base = join(nnUNet_raw_data, foldername)
    imagestr = join(out_base, "imagesTr")
    imagests = join(out_base, "imagesTs")
    labelstr = join(out_base, "labelsTr")
    labelste = join(out_base, "labelsTs")
    maybe_mkdir_p(imagestr)
    maybe_mkdir_p(imagests)
    maybe_mkdir_p(labelstr)
    maybe_mkdir_p(labelste)

    img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
    lab_tr_itk = sitk.GetImageFromArray(train_labels.astype(np.uint8))
    img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
    lab_te_itk = sitk.GetImageFromArray(test_labels.astype(np.uint8))

    img_tr_itk.SetSpacing((5, 5, 5))
    lab_tr_itk.SetSpacing((5, 5, 5))
    img_te_itk.SetSpacing((5, 5, 5))
    lab_te_itk.SetSpacing((5, 5, 5))

    # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
    sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training1_0000.nii.gz"))
    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training2_0000.nii.gz"))
    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training3_0000.nii.gz"))
    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training4_0000.nii.gz"))

    sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training1.nii.gz"))
    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training2.nii.gz"))
    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training3.nii.gz"))
    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training4.nii.gz"))

    sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
    sitk.WriteImage(lab_te_itk, join(labelste, "testing.nii.gz"))

    json_dict = OrderedDict()
    json_dict['name'] = task_name
    json_dict['description'] = task_name
    json_dict['tensorImageSize'] = "4D"
    json_dict['reference'] = "see challenge website"
    json_dict['licence'] = "see challenge website"
    json_dict['release'] = "0.0"
    json_dict['modality'] = {
        "0": "EM",
    }
    json_dict['labels'] = {i: str(i) for i in range(2)}

    json_dict['numTraining'] = 5
    json_dict['numTest'] = 1
    json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
                             range(5)]
    json_dict['test'] = ["./imagesTs/testing.nii.gz"]

    save_json(json_dict, os.path.join(out_base, "dataset.json"))