ho11laqe commited on
Commit
ecf08bc
1 Parent(s): 75ea7e6
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. HIP_Logo.png +0 -0
  2. LICENSE +201 -0
  3. RUN_CALVINGFRONT_DETECTION.sh +26 -0
  4. create_plots_new/Nofront.py +77 -0
  5. create_plots_new/SegmentationMetrics.py +65 -0
  6. create_plots_new/area_change.py +92 -0
  7. create_plots_new/canny_edge.py +157 -0
  8. create_plots_new/compute_significance.py +94 -0
  9. create_plots_new/create_train_gif.py +158 -0
  10. create_plots_new/dataset_timeline.py +56 -0
  11. create_plots_new/front_change.py +228 -0
  12. data_processing/data_postprocessing.py +323 -0
  13. documentation/common_problems_and_solutions.md +104 -0
  14. documentation/common_questions.md +201 -0
  15. documentation/data_format_inference.md +34 -0
  16. documentation/dataset_conversion.md +213 -0
  17. documentation/expected_epoch_times.md +173 -0
  18. documentation/extending_nnunet.md +119 -0
  19. documentation/inference_example_Prostate.md +78 -0
  20. documentation/setting_up_paths.md +84 -0
  21. documentation/training_example_Hippocampus.md +40 -0
  22. documentation/tutorials/custom_preprocessing.md +60 -0
  23. documentation/tutorials/custom_spacing.md +33 -0
  24. documentation/tutorials/edit_plans_files.md +141 -0
  25. documentation/using_nnUNet_as_baseline.md +4 -0
  26. evaluate_nnUNet.py +656 -0
  27. nnunet/__init__.py +7 -0
  28. nnunet/configuration.py +5 -0
  29. nnunet/dataset_conversion/Task017_BeyondCranialVaultAbdominalOrganSegmentation.py +94 -0
  30. nnunet/dataset_conversion/Task024_Promise2012.py +81 -0
  31. nnunet/dataset_conversion/Task027_AutomaticCardiacDetectionChallenge.py +106 -0
  32. nnunet/dataset_conversion/Task029_LiverTumorSegmentationChallenge.py +123 -0
  33. nnunet/dataset_conversion/Task032_BraTS_2018.py +176 -0
  34. nnunet/dataset_conversion/Task035_ISBI_MSLesionSegmentationChallenge.py +162 -0
  35. nnunet/dataset_conversion/Task037_038_Chaos_Challenge.py +460 -0
  36. nnunet/dataset_conversion/Task040_KiTS.py +240 -0
  37. nnunet/dataset_conversion/Task043_BraTS_2019.py +164 -0
  38. nnunet/dataset_conversion/Task055_SegTHOR.py +98 -0
  39. nnunet/dataset_conversion/Task056_VerSe2019.py +274 -0
  40. nnunet/dataset_conversion/Task056_Verse_normalize_orientation.py +98 -0
  41. nnunet/dataset_conversion/Task058_ISBI_EM_SEG.py +105 -0
  42. nnunet/dataset_conversion/Task059_EPFL_EM_MITO_SEG.py +99 -0
  43. nnunet/dataset_conversion/Task061_CREMI.py +146 -0
  44. nnunet/dataset_conversion/Task062_NIHPancreas.py +89 -0
  45. nnunet/dataset_conversion/Task064_KiTS_labelsFixed.py +84 -0
  46. nnunet/dataset_conversion/Task065_KiTS_NicksLabels.py +87 -0
  47. nnunet/dataset_conversion/Task069_CovidSeg.py +68 -0
  48. nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py +137 -0
  49. nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py +312 -0
  50. nnunet/dataset_conversion/Task082_BraTS_2020.py +751 -0
HIP_Logo.png ADDED
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [2019] [Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
RUN_CALVINGFRONT_DETECTION.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -l
2
+
3
+ while getopts ":m:d:" opt; do
4
+ case $opt in
5
+ m) model="$OPTARG";;
6
+ d) data="$OPTARG";;
7
+ *) echo "Unknown error occurred."
8
+ exit 1;;
9
+ esac
10
+ done
11
+ # Point to the folder with the SAR images
12
+ export data_raw=$data
13
+
14
+ # Folders for processing
15
+ export nnUNet_raw_data_base=$data_raw'/data_nnUNet_preprocessed/NIFTI/'
16
+ export nnUNet_preprocessed=$data_raw'/data_nnUNet_preprocessed/'
17
+ export RESULTS_FOLDER=$data_raw'/calvingfronts/'
18
+
19
+ # Convert & Preprocess
20
+ python3 nnunet/dataset_conversion/Task500_Glacier_inference.py -data_percentage 100 -base $data_raw
21
+
22
+ # Inference
23
+ python3 nnunet/inference/predict_simple.py -i $nnUNet_raw_data_base'nnUNet_raw_data/Task500_Glacier_zonefronts/imagesTs/' -o $RESULTS_FOLDER/fold_0 -t 500 -m 2d -f 0 -p nnUNetPlansv2.1 -tr nnUNetTrainerV2 -model_folder_name $model
24
+
25
+ # Convert model output to PNG/TIF
26
+ python3 nnunet/dataset_conversion/Task500_Glacier_reverse.py -i $RESULTS_FOLDER'fold_0/'
create_plots_new/Nofront.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import plotly.io as pio
6
+ import os
7
+ pio.kaleido.scope.mathjax = None
8
+ import json
9
+
10
+
11
+ if __name__ == '__main__':
12
+ experiments =['Task501_Glacier_front',
13
+ 'Task502_Glacier_zone',
14
+ 'Task503_Glacier_mtl_early',
15
+ 'Task503_Glacier_mtl_late',
16
+ 'Task505_Glacier_mtl_boundary',
17
+ 'Task500_Glacier_zonefronts']
18
+ data_dir = '/home/ho11laqe/Desktop/nnUNet_results/Final_Eval/'
19
+
20
+ nofront = {}
21
+ nozone = {}
22
+ for experiment in experiments:
23
+ no_front_exp_front = []
24
+ no_front_exp_zone = []
25
+ #nofront[experiment] = {'Front': [], 'Zone': []}
26
+ for fold in range(5):
27
+ results_json_path = os.path.join(data_dir, experiment, 'fold_'+str(fold), 'pngs', 'eval_results.json')
28
+ if not os.path.exists(results_json_path):
29
+ results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'eval_results.json')
30
+
31
+ with open(results_json_path, 'r') as f:
32
+ result = json.load(f)
33
+ if 'Front_Delineation' in result.keys():
34
+ #no_front_exp_front.append(result['Front_Delineation']['Result_all']['Number_no_front'])
35
+ no_front_exp_front.append(result['Front_Delineation']['Result_all']['mean'])
36
+ else:
37
+ no_front_exp_front.append(0)
38
+ if 'Zone_Delineation' in result.keys():
39
+ no_front_exp_zone.append(result['Zone_Delineation']['Result_all']['mean'])
40
+ else:
41
+ no_front_exp_zone.append(0)
42
+
43
+ #nofront[experiment]['Front'] = no_front_exp_front
44
+ #nofront[experiment]['Zone'] = no_front_exp_zone
45
+ nofront[experiment] = no_front_exp_front
46
+ nozone[experiment] = no_front_exp_zone
47
+
48
+ box_width = 0.8
49
+ fig = px.box(None, points="all", template="plotly_white", width=1200, height=500)
50
+
51
+ fig.add_trace(go.Box(y=nofront['Task501_Glacier_front'], name='Front<br>STL', width=box_width,
52
+ marker_color='CadetBlue', pointpos=0, boxpoints='all', boxmean=True))
53
+
54
+ fig.add_trace(go.Box(y=nofront['Task503_Glacier_mtl_early'], name='Early Front <br>MTL', width=box_width,
55
+ marker_color='YellowGreen', pointpos=0, boxpoints='all', boxmean=True))
56
+ fig.add_trace(go.Box(y=nofront['Task503_Glacier_mtl_late'], name='Late Front <br>MTL', width=box_width,
57
+ marker_color='#e1e400 ', pointpos=0, boxpoints='all', boxmean=True))
58
+ fig.add_trace(go.Box(y=nofront['Task505_Glacier_mtl_boundary'], name='Boundary<br> Front MTL', width=box_width,
59
+ marker_color='gold', pointpos=0, boxpoints='all', boxmean=True))
60
+ fig.add_trace(go.Box(y=nofront['Task500_Glacier_zonefronts'], name='Fused Labels <br> Front', width=box_width,
61
+ marker_color='orange', pointpos=0, boxpoints='all', boxmean=True))
62
+
63
+ fig.add_trace(go.Box(y=nozone['Task502_Glacier_zone'], name='Zone<br> STL', width=box_width,
64
+ marker_color='LightBlue ', pointpos=0, boxpoints='all', boxmean=True))
65
+ fig.add_trace(go.Box(y=nozone['Task503_Glacier_mtl_early'], name='Early Zone <br>MTL', width=box_width,
66
+ marker_color='YellowGreen', pointpos=0, boxpoints='all', boxmean=True,))
67
+ fig.add_trace(go.Box(y=nozone['Task503_Glacier_mtl_late'], name='Late Zone<br> MTL', width=box_width,
68
+ marker_color='#e1e400', pointpos=0, boxpoints='all', boxmean=True))
69
+ fig.add_trace(go.Box(y=nozone['Task505_Glacier_mtl_boundary'], name='Boundary <br>Zone MTL', width=box_width,
70
+ marker_color='gold', pointpos=0, boxpoints='all', boxmean=True))
71
+ fig.add_trace(go.Box(y=nozone['Task500_Glacier_zonefronts'], name='Fused Labels <br> Zone', width=box_width,
72
+ marker_color='orange', pointpos=0, boxpoints='all', boxmean=True))
73
+
74
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=18))
75
+ fig.update_yaxes(title='Front delineation error (m)')
76
+ # fig.show()
77
+ fig.write_image("output/results.pdf", format='pdf')
create_plots_new/SegmentationMetrics.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import plotly.io as pio
6
+ import os
7
+ pio.kaleido.scope.mathjax = None
8
+ import json
9
+
10
+
11
+ if __name__ == '__main__':
12
+ experiments =['Task501_Glacier_front',
13
+ 'Task502_Glacier_zone',
14
+ 'Task503_Glacier_mtl_early',
15
+ 'Task503_Glacier_mtl_late',
16
+ 'Task505_Glacier_mtl_boundary',
17
+ 'Task500_Glacier_zonefronts_nodeep',
18
+ 'Task500_Glacier_zonefronts'
19
+ ]
20
+ data_dir = '/home/ho11laqe/Desktop/nnUNet_results/Final_Eval/'
21
+
22
+ for metric in ['Precision', 'Recall', 'F1', 'IoU']:
23
+
24
+ zone_metric = {}
25
+ for experiment in experiments:
26
+
27
+ zone_metric_exp = []
28
+ #nofront[experiment] = {'Front': [], 'Zone': []}
29
+ for fold in range(5):
30
+ # load json file with results
31
+ results_json_path = os.path.join(data_dir, experiment, 'fold_'+str(fold), 'pngs', 'eval_results.json')
32
+ if not os.path.exists(results_json_path):
33
+ results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'eval_results.json')
34
+
35
+ with open(results_json_path, 'r') as f:
36
+ result = json.load(f)
37
+
38
+ if 'Zone_Segmentation' in result.keys():
39
+ avg_metric = 'Average_'+metric
40
+ if metric == 'F1':
41
+ avg_metric = 'Average_' + metric + ' Score'
42
+ zone_metric_exp.append(result['Zone_Segmentation']['Zone_'+metric][avg_metric])
43
+ else:
44
+ zone_metric_exp.append(0)
45
+
46
+ zone_metric[experiment] = zone_metric_exp
47
+
48
+ box_width = 0.8
49
+ fig = px.box(None, points="all", template="plotly_white", width=700, height=500)
50
+
51
+ fig.add_trace(go.Box(y=zone_metric['Task502_Glacier_zone'], name='Zone<br> STL', width=box_width,
52
+ line_color='LightBlue ', pointpos=0, boxpoints='all', boxmean=True))
53
+ fig.add_trace(go.Box(y=zone_metric['Task503_Glacier_mtl_early'], name='Early Zone <br>MTL', width=box_width,
54
+ line_color='YellowGreen', pointpos=0, boxpoints='all', boxmean=True,))
55
+ fig.add_trace(go.Box(y=zone_metric['Task503_Glacier_mtl_late'], name='Late Zone<br> MTL', width=box_width,
56
+ line_color='#e1e400', pointpos=0, boxpoints='all', boxmean=True))
57
+ fig.add_trace(go.Box(y=zone_metric['Task505_Glacier_mtl_boundary'], name='Boundary<br>Zone MTL', width=box_width,
58
+ line_color='gold', pointpos=0, boxpoints='all', boxmean=True))
59
+ fig.add_trace(go.Box(y=zone_metric['Task500_Glacier_zonefronts'], name='Fused Labels<br>Front', width=box_width,
60
+ line_color='orange', pointpos=0, boxpoints='all', boxmean=True))
61
+
62
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roman", size=18))
63
+ fig.update_yaxes(title=metric)
64
+ # fig.show()
65
+ fig.write_image('output/'+metric+".pdf", format='pdf')
create_plots_new/area_change.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+ import plotly.express as px
4
+ import numpy as np
5
+ import pandas as pd
6
+ from plotly.subplots import make_subplots
7
+ import plotly.io as pio
8
+ pio.kaleido.scope.mathjax = None
9
+
10
+
11
+ def distribute_glacier(list_of_samples):
12
+ list_of_glaciers = {}
13
+ for glacier in [ 'COL', 'Mapple', 'Crane', 'Jorum','DBE','SI', 'JAC']:
14
+ list_of_glaciers[glacier] = [sample for sample in list_of_samples if glacier in sample]
15
+ return list_of_glaciers
16
+
17
+
18
+ if __name__ == '__main__':
19
+ generate_data = True
20
+ if generate_data:
21
+ # directories with zone label
22
+ train_dir = '/home/ho11laqe/PycharmProjects/data_raw/zones/train'
23
+ test_dir = '/home/ho11laqe/PycharmProjects/data_raw/zones/test'
24
+
25
+ list_of_train_samples = []
26
+ for sample in os.listdir(train_dir):
27
+ list_of_train_samples.append(os.path.join(train_dir, sample))
28
+
29
+ list_of_test_samples = []
30
+ for sample in os.listdir(test_dir):
31
+ list_of_test_samples.append(os.path.join(test_dir, sample))
32
+
33
+ list_of_samples = list_of_train_samples + list_of_test_samples
34
+
35
+ list_of_glacier = distribute_glacier(list_of_samples)
36
+
37
+ fig = make_subplots(rows=len(list_of_glacier.keys()), cols=1)
38
+ nan = []
39
+ rock = []
40
+ ice = []
41
+ ocean = []
42
+ date = []
43
+ glacier_name = []
44
+ for i, glacier in enumerate(list_of_glacier.keys()):
45
+
46
+ for sample in list_of_glacier[glacier]:
47
+ print(sample)
48
+ seg_mask = cv2.imread(sample, cv2.IMREAD_GRAYSCALE)
49
+ all_pixel = seg_mask.shape[0] * seg_mask.shape[1]
50
+ nan.append(np.count_nonzero(seg_mask == 0) / all_pixel * 100)
51
+ rock.append(np.count_nonzero(seg_mask == 64) / all_pixel * 100)
52
+ ice.append(np.count_nonzero(seg_mask == 127) / all_pixel * 100)
53
+ ocean.append(np.count_nonzero(seg_mask == 254) / all_pixel * 100)
54
+
55
+ sample_split = sample.split('_')
56
+ date.append(sample_split[-6])
57
+ glacier_name.append(glacier)
58
+
59
+ df = pd.DataFrame(dict(Shadow=nan, Rock=rock, Glacier=ice, Ocean=ocean, date=date, glacier_name=glacier_name))
60
+ df.to_csv('output/area.csv')
61
+
62
+ else:
63
+ df = pd.read_csv('output/area.csv')
64
+
65
+ df = df.drop_duplicates(subset=['date', 'glacier_name'])
66
+ area_plot = px.area(df,
67
+ x="date",
68
+ y=["Rock", "Shadow", "Glacier", "Ocean"],
69
+ color_discrete_map={"Shadow": 'black', "Ocean": 'blue', "Glacier": "aliceblue", "Rock": "gray"},
70
+ template="plotly_white",
71
+ height=700,
72
+ width =600,
73
+ facet_row='glacier_name',
74
+ category_orders={'glacier': [ 'COL', 'Mapple', 'Crane', 'Jorum','DBE','SI', 'JAC']}
75
+ )
76
+ area_plot.update_yaxes(type='linear', range=[0, 100], ticksuffix='%', title='area', side='right')
77
+ area_plot.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1], textangle=0, x=0, xanchor='right'))
78
+ area_plot.update_layout(legend=dict(title='Area:',
79
+ orientation="h",
80
+ yanchor="bottom",
81
+ y=1.02,
82
+ xanchor="right",
83
+ x=1,
84
+ font=dict(size=12)),
85
+ margin=dict(l=70, r=0, t=0, b=0)
86
+ )
87
+ area_plot.for_each_yaxis(lambda a: a.update(title=''))
88
+ area_plot.update_xaxes(title=' ',tickfont=dict(size=12))
89
+ area_plot.update_layout(font=dict(family="Times New Roma", size=10, ))
90
+ area_plot.update_annotations(font=dict(size=12))
91
+ area_plot.write_image("output/area.pdf", format='pdf')
92
+ # fig.show()
create_plots_new/canny_edge.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from scipy import ndimage
2
+ from scipy.ndimage.filters import convolve
3
+
4
+ from scipy import misc
5
+ import numpy as np
6
+ import cv2
7
+
8
+
9
+ class cannyEdgeDetector:
10
+ def __init__(self, imgs, sigma=5, kernel_size=10, weak_pixel=75, strong_pixel=255, lowthreshold=0.05,
11
+ highthreshold=0.15):
12
+ self.imgs = imgs
13
+ self.imgs_final = []
14
+ self.img_smoothed = None
15
+ self.gradientMat = None
16
+ self.thetaMat = None
17
+ self.nonMaxImg = None
18
+ self.thresholdImg = None
19
+ self.weak_pixel = weak_pixel
20
+ self.strong_pixel = strong_pixel
21
+ self.sigma = sigma
22
+ self.kernel_size = kernel_size
23
+ self.lowThreshold = lowthreshold
24
+ self.highThreshold = highthreshold
25
+ return
26
+
27
+ def gaussian_kernel(self, size, sigma=1):
28
+ size = int(size) // 2
29
+ x, y = np.mgrid[-size:size + 1, -size:size + 1]
30
+ normal = 1 / (2.0 * np.pi * sigma ** 2)
31
+ g = np.exp(-((x ** 2 + y ** 2) / (2.0 * sigma ** 2))) * normal
32
+ return g
33
+
34
+ def sobel_filters(self, img):
35
+ Kx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], np.float32)
36
+ Ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], np.float32)
37
+
38
+ Ix = ndimage.filters.convolve(img, Kx)
39
+ Iy = ndimage.filters.convolve(img, Ky)
40
+
41
+ G = np.hypot(Ix, Iy)
42
+ G = G / G.max() * 255
43
+ theta = np.arctan2(Iy, Ix)
44
+ return (G, theta, Ix, Iy)
45
+
46
+ def non_max_suppression(self, img, D):
47
+ M, N = img.shape
48
+ Z = np.zeros((M, N), dtype=np.int32)
49
+ angle = D * 180. / np.pi
50
+ angle[angle < 0] += 180
51
+
52
+ for i in range(1, M - 1):
53
+ for j in range(1, N - 1):
54
+ try:
55
+ q = 255
56
+ r = 255
57
+
58
+ # angle 0
59
+ if (0 <= angle[i, j] < 22.5) or (157.5 <= angle[i, j] <= 180):
60
+ q = img[i, j + 1]
61
+ r = img[i, j - 1]
62
+ # angle 45
63
+ elif (22.5 <= angle[i, j] < 67.5):
64
+ q = img[i + 1, j - 1]
65
+ r = img[i - 1, j + 1]
66
+ # angle 90
67
+ elif (67.5 <= angle[i, j] < 112.5):
68
+ q = img[i + 1, j]
69
+ r = img[i - 1, j]
70
+ # angle 135
71
+ elif (112.5 <= angle[i, j] < 157.5):
72
+ q = img[i - 1, j - 1]
73
+ r = img[i + 1, j + 1]
74
+
75
+ if (img[i, j] >= q) and (img[i, j] >= r):
76
+ Z[i, j] = img[i, j]
77
+ else:
78
+ Z[i, j] = 0
79
+
80
+
81
+ except IndexError as e:
82
+ pass
83
+
84
+ return Z
85
+
86
+ def threshold(self, img):
87
+
88
+ highThreshold = img.max() * self.highThreshold;
89
+ lowThreshold = highThreshold * self.lowThreshold;
90
+
91
+ M, N = img.shape
92
+ res = np.zeros((M, N), dtype=np.int32)
93
+
94
+ weak = np.int32(self.weak_pixel)
95
+ strong = np.int32(self.strong_pixel)
96
+
97
+ strong_i, strong_j = np.where(img >= highThreshold)
98
+ zeros_i, zeros_j = np.where(img < lowThreshold)
99
+
100
+ weak_i, weak_j = np.where((img <= highThreshold) & (img >= lowThreshold))
101
+
102
+ res[strong_i, strong_j] = strong
103
+ res[weak_i, weak_j] = weak
104
+
105
+ return (res)
106
+
107
+ def hysteresis(self, img):
108
+
109
+ M, N = img.shape
110
+ weak = self.weak_pixel
111
+ strong = self.strong_pixel
112
+
113
+ for i in range(1, M - 1):
114
+ for j in range(1, N - 1):
115
+ if (img[i, j] == weak):
116
+ try:
117
+ if ((img[i + 1, j - 1] == strong) or (img[i + 1, j] == strong) or (img[i + 1, j + 1] == strong)
118
+ or (img[i, j - 1] == strong) or (img[i, j + 1] == strong)
119
+ or (img[i - 1, j - 1] == strong) or (img[i - 1, j] == strong) or (
120
+ img[i - 1, j + 1] == strong)):
121
+ img[i, j] = strong
122
+ else:
123
+ img[i, j] = 0
124
+ except IndexError as e:
125
+ pass
126
+
127
+ return img
128
+
129
+ def detect(self):
130
+ imgs_final = []
131
+ for i, img in enumerate(self.imgs):
132
+ cv2.imwrite('output/0img.png', img)
133
+ self.img_smoothed = convolve(img, self.gaussian_kernel(self.kernel_size, self.sigma))
134
+ self.img_smoothed = self.img_smoothed/np.max(self.img_smoothed)*255
135
+ cv2.imwrite('output/1smoothed.png', self.img_smoothed)
136
+ self.gradientMat, self.thetaMat, Ix, Iy = self.sobel_filters(self.img_smoothed)
137
+ cv2.imwrite('output/2Ix.png', Ix)
138
+ cv2.imwrite('output/2Iy.png', Iy)
139
+ cv2.imwrite('output/4deltaI.png', self.gradientMat.astype(float))
140
+ cv2.imwrite('output/5theta.png', self.thetaMat.astype(float) / np.max(self.thetaMat) * 255)
141
+ self.nonMaxImg = self.non_max_suppression(self.gradientMat, self.thetaMat)
142
+ cv2.imwrite('output/6nonmax.png', self.nonMaxImg)
143
+ self.thresholdImg = self.threshold(self.nonMaxImg)
144
+ cv2.imwrite('output/7threshold.png', self.thresholdImg)
145
+ img_final = self.hysteresis(self.thresholdImg)
146
+ self.imgs_final.append(img_final)
147
+
148
+ return self.imgs_final
149
+
150
+ if __name__ == '__main__':
151
+ image_path = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/test/Mapple_2011-06-02_TSX_7_1_110.png'
152
+
153
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)[:1000,-1000:].astype(np.float32)
154
+ detector = cannyEdgeDetector([img], sigma=20)
155
+ edge = detector.detect()
156
+ cv2.imwrite('output/8edge.png', edge[0])
157
+
create_plots_new/compute_significance.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import plotly.io as pio
6
+ pio.kaleido.scope.mathjax = None
7
+ import os
8
+ import json
9
+
10
+
11
+ if __name__ == '__main__':
12
+
13
+ experiments = ['Task501_Glacier_front',
14
+ 'Task502_Glacier_zone',
15
+ 'Task503_Glacier_mtl_early',
16
+ 'Task503_Glacier_mtl_late',
17
+ 'Task505_Glacier_mtl_boundary',
18
+ 'Task500_Glacier_zonefronts']
19
+ data_dir = '/home/ho11laqe/Desktop/nnUNet_results/Final_Eval/'
20
+
21
+ zone_mean = {}
22
+ front_mean = {}
23
+ for experiment in experiments:
24
+ print(experiment)
25
+ zone_mean_exp = []
26
+ front_mean_exp = []
27
+ # nofront[experiment] = {'Front': [], 'Zone': []}
28
+ for fold in range(5):
29
+ # load json file with results
30
+ results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'pngs',
31
+ 'eval_results.json')
32
+ if not os.path.exists(results_json_path):
33
+ results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'eval_results.json')
34
+
35
+ with open(results_json_path, 'r') as f:
36
+ result = json.load(f)
37
+
38
+ if 'Front_Delineation' in result.keys():
39
+
40
+ front_mean_exp.append(result['Front_Delineation']['Result_all']['mean'])
41
+ else:
42
+ front_mean_exp.append(0)
43
+
44
+ if 'Zone_Delineation' in result.keys():
45
+ zone_mean_exp.append(result['Zone_Delineation']['Result_all']['mean'])
46
+ else:
47
+ zone_mean_exp.append(0)
48
+
49
+ print(np.mean(zone_mean_exp), np.std(zone_mean_exp))
50
+ print(np.mean(front_mean_exp), np.std(front_mean_exp))
51
+ zone_mean[experiment] = zone_mean_exp
52
+ front_mean[experiment] = front_mean_exp
53
+
54
+ for exp1 in experiments:
55
+ for exp2 in experiments:
56
+ # FRONT
57
+ mean1 = np.mean(front_mean[exp1])
58
+ var1 = np.var (front_mean[exp1])
59
+ mean2 = np.mean(front_mean[exp2])
60
+ var2 = np.var(front_mean[exp2])
61
+
62
+ T_front = abs(mean1 - mean2) / np.sqrt((var1 / 5) + (var2 / 5))
63
+ print(exp1 + '<>' +exp2)
64
+ print('Tfront:'+ str(T_front))
65
+
66
+ # Zone
67
+ mean1 = np.mean(zone_mean[exp1])
68
+ var1 = np.var(zone_mean[exp1])
69
+ mean2 = np.mean(zone_mean[exp2])
70
+ var2 = np.var(zone_mean[exp2])
71
+
72
+ T_zone = abs(mean1 - mean2) / np.sqrt((var1 / 5) + (var2 / 5))
73
+ print('Tzone:' + str(T_zone))
74
+ print('')
75
+ """
76
+ box_width = 0.8
77
+ fig = px.box(None, points="all", template="plotly_white", width=600, height=500)
78
+
79
+ fig.add_trace(go.Box(y=zone_mean['Task502_Glacier_zone'], name='Zone<br> STL', width=box_width,
80
+ line_color='black', fillcolor='LightBlue ', pointpos=0, boxpoints='all', boxmean=True))
81
+ fig.add_trace(go.Box(y=zone_mean['Task503_Glacier_mtl_early'], name='Early Zone <br>MTL', width=box_width,
82
+ line_color='black', fillcolor='YellowGreen', pointpos=0, boxpoints='all',
83
+ boxmean=True, ))
84
+ fig.add_trace(go.Box(y=zone_mean['Task503_Glacier_mtl_late'], name='Late Zone<br> MTL', width=box_width,
85
+ line_color='black', fillcolor='#e1e400', pointpos=0, boxpoints='all', boxmean=True))
86
+ fig.add_trace(
87
+ go.Box(y=zone_mean['Task505_Glacier_mtl_boundary'], name='Boundary <br>Zone MTL', width=box_width,
88
+ line_color='black', fillcolor='gold', pointpos=0, boxpoints='all', boxmean=True))
89
+
90
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roman", size=18))
91
+ fig.update_yaxes(title='Front mean')
92
+ # fig.show()
93
+ fig.write_image('Front mean' + ".pdf", format='pdf')
94
+ """
create_plots_new/create_train_gif.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import imageio
2
+ from skimage import io
3
+ import skimage
4
+
5
+ import os
6
+ from PIL import Image, ImageDraw, ImageFont, ImageOps
7
+ import copy
8
+
9
+ from datetime import date
10
+ import numpy as np
11
+ from argparse import ArgumentParser
12
+ from skimage.transform import resize
13
+ # import matplotlib.pyplot as plt
14
+ import cv2
15
+
16
+
17
+ def color_map(m):
18
+ return m[0] * np.array([1, 1, 1]) + (255 - m[0]) * np.array([0, 0, 1])
19
+
20
+
21
+ def createOverlay(image, front, zone, boundary):
22
+ """
23
+ creates an image with the front label overlaying the glacier image
24
+
25
+ :param image: Image of the glacier
26
+ :param front: Image of the label of the front
27
+ :return: an rgb image with the black and white image and red front line
28
+ """
29
+
30
+ # value for NA area=0, stone=64, glacier=127, ocean with ice melange=254
31
+
32
+ image_rgb = np.array(image * 0.5, dtype=np.uint8)
33
+
34
+ try:
35
+ image_rgb[zone == 0] += np.array(np.array([0, 0, 0]) / 2, dtype=np.uint8)
36
+ image_rgb[zone == 64] += np.array(np.array([52, 46, 55]) / 2, dtype=np.uint8)
37
+ image_rgb[zone == 127] += np.array(np.array([254, 254, 254]) / 2, dtype=np.uint8)
38
+ image_rgb[zone == 254] += np.array(np.array([60, 145, 230]) / 2, dtype=np.uint8)
39
+
40
+ finally:
41
+ #try:
42
+ # image_rgb[boundary > 0] = np.array(np.array([241, 143, 1]), dtype=np.uint8)
43
+ #finally:
44
+ image_rgb[front == 255] = np.array(np.array([255, 0, 0]), dtype=np.uint8)
45
+
46
+ return image_rgb
47
+
48
+
49
+ def create_target(sar_image_path):
50
+ sample_name = sar_image_path.split('/')[-1]
51
+ sar_image = cv2.imread(sar_image_path)
52
+ front_image_path = '/home/ho11laqe/PycharmProjects/data_raw/fronts_dilated_5/train/' + sample_name[
53
+ :-len('.png')] + '_front.png'
54
+ zone_image_path = '/home/ho11laqe/PycharmProjects/data_raw/zones/train/' + sample_name[
55
+ :-len('.png')] + '_zones.png'
56
+
57
+ boundary_image_path = '/home/ho11laqe/PycharmProjects/data_raw/boundaries_dilated_5/train/' + sample_name[
58
+ :-len(
59
+ '.png')] + '_boundary.png'
60
+ front = cv2.imread(front_image_path, cv2.IMREAD_GRAYSCALE)
61
+ zone = cv2.imread(zone_image_path, cv2.IMREAD_GRAYSCALE)
62
+ boundary = cv2.imread(boundary_image_path, cv2.IMREAD_GRAYSCALE)
63
+ overlay = createOverlay(sar_image, front, zone, boundary)
64
+ cv2.imwrite('output/target.png', cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
65
+
66
+
67
+ if __name__ == '__main__':
68
+ parser = ArgumentParser(add_help=False)
69
+ parser.add_argument('--image_dir', help="Directory with predictions as png")
70
+ args = parser.parse_args()
71
+
72
+ image_dir = args.image_dir
73
+
74
+ front_gif = []
75
+ fronts = []
76
+ zone_gif = []
77
+ zones = []
78
+ boundary_gif = []
79
+ boundaries = []
80
+
81
+ sar_image_path = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/train/DBE_2008-03-30_TSX_7_3_049.png'
82
+ sar_image = cv2.imread(sar_image_path)
83
+ shape = sar_image.shape
84
+ new_shape = (int(shape[1] / 4), int(shape[0] / 4))
85
+ sar_image = cv2.resize(sar_image, new_shape)
86
+
87
+ create_target(sar_image_path)
88
+
89
+ list_images = os.listdir(image_dir)
90
+ list_images.sort(key=lambda y: int(y.split('_')[6]))
91
+
92
+ for i, image_file in enumerate(list_images[:300]):
93
+ epoch = image_file.split('_')[6]
94
+ if image_file.endswith('_front.png'):
95
+ print(image_file)
96
+ front = cv2.imread(image_dir + '/' + image_file, cv2.IMREAD_GRAYSCALE)
97
+ front = cv2.resize(front, new_shape, interpolation=cv2.INTER_NEAREST)
98
+ # image = Image.fromarray(front)
99
+ # image_draw = ImageDraw.Draw(image)
100
+ # image_draw.text((1,1), 'Epoch: '+str(epoch))
101
+ # front_gif.append(image)
102
+ fronts.append(front)
103
+ elif image_file.endswith('_zone.png'):
104
+ print(image_file)
105
+ zone = cv2.imread(image_dir + '/' + image_file, cv2.IMREAD_GRAYSCALE)
106
+ zone = cv2.resize(zone, new_shape, interpolation=cv2.INTER_NEAREST)
107
+ # image = Image.fromarray(zone)
108
+ # image_draw = ImageDraw.Draw(image)
109
+ # image_draw.text((1, 1), 'Epoch: ' + str(epoch))
110
+ # zone_gif.append(image)
111
+ zones.append(zone)
112
+ elif image_file.endswith('_boundary.png'):
113
+ print(image_file)
114
+ boundary = cv2.imread(image_dir + '/' + image_file, cv2.IMREAD_GRAYSCALE)
115
+ boundary = cv2.resize(boundary, new_shape, interpolation=cv2.INTER_NEAREST)
116
+ # image = Image.fromarray(boundary)
117
+ # image_draw = ImageDraw.Draw(image)
118
+ # image_draw.text((1, 1), 'Epoch: ' + str(epoch))
119
+ # boundary_gif.append(image)
120
+ boundaries.append(boundary)
121
+
122
+ font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf", 40)
123
+ font_legend = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf", 20)
124
+ overlay_gif = []
125
+ for epoch, (front, zone, boundary) in enumerate(zip(fronts, zones, boundaries)):
126
+ overlay = createOverlay(sar_image, front, zone, boundary)
127
+ image = Image.fromarray(overlay)
128
+ image_draw = ImageDraw.Draw(image)
129
+
130
+ image_draw.rectangle((0, 40, 195, 210), fill='gray')
131
+
132
+ image_draw.rectangle((10, 60, 30, 80), fill=(60, 145, 230, 120))
133
+ image_draw.text((35, 60), 'Ocean', font=font_legend)
134
+
135
+ image_draw.rectangle((10, 90, 30, 110), fill=(255, 255, 255))
136
+ image_draw.text((35, 90), 'Glacier', font=font_legend)
137
+
138
+ image_draw.rectangle((10, 120, 30, 140), fill=(255, 0, 0))
139
+ image_draw.text((35, 120), 'Glacier Front', font=font_legend)
140
+
141
+ image_draw.rectangle((10, 150, 30, 170), fill=(92, 76, 85))
142
+ image_draw.text((35, 150), 'Rock', font=font_legend)
143
+
144
+ image_draw.rectangle((10, 180, 30, 200), fill=(0, 0, 0))
145
+ image_draw.text((35, 180), 'Shadow', font=font_legend)
146
+
147
+ image_draw.rectangle((0, 0, 330, 45), fill='gray')
148
+ image_draw.text((8, 1), 'Epoch:%03i' % epoch + '/' + str(len(fronts)), font=font, )
149
+ if epoch < 10:
150
+ for i in range(10 - epoch):
151
+ print(i)
152
+ overlay_gif.append(image)
153
+ else:
154
+ overlay_gif.append(image)
155
+
156
+ frame_one = overlay_gif[0]
157
+ frame_one.save("output/overlay.gif", format="GIF", append_images=overlay_gif,
158
+ save_all=True, duration=200, loop=0)
create_plots_new/dataset_timeline.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as cv
2
+ import os
3
+ import plotly.express as px
4
+ import plotly.figure_factory as ff
5
+ import datetime
6
+ import plotly.io as pio
7
+
8
+ pio.kaleido.scope.mathjax = None
9
+
10
+
11
+ def distribute_glacier(list_of_samples):
12
+ list_of_glaciers = {}
13
+ for glacier in ['COL', 'Mapple', 'Crane', 'Jorum', 'DBE', 'SI', 'JAC']:
14
+ list_of_glaciers[glacier] = [sample for sample in list_of_samples if glacier in sample]
15
+ return list_of_glaciers
16
+
17
+
18
+ def create_dict(list_of_samples):
19
+ list_dict = []
20
+ for sample in list_of_samples:
21
+ sample_split = sample.split('_')
22
+ finish_date = datetime.datetime.fromisoformat(sample_split[1]) + datetime.timedelta(days=50)
23
+ sample_dict = {
24
+ 'Glacier': sample_split[0],
25
+ 'Start': sample_split[1],
26
+ 'Finish': str(finish_date),
27
+ 'Satellite:': sample_split[2]
28
+ }
29
+ list_dict.append(sample_dict)
30
+ return list_dict
31
+
32
+
33
+ if __name__ == '__main__':
34
+ list_of_train_samples = os.listdir('/home/ho11laqe/PycharmProjects/data_raw/fronts/train')
35
+ list_of_test_samples = os.listdir('/home/ho11laqe/PycharmProjects/data_raw/fronts/test')
36
+ list_of_samples = list_of_train_samples + list_of_test_samples
37
+ list_of_glaciers = distribute_glacier(list_of_samples)
38
+ list_dict = create_dict(list_of_samples)
39
+
40
+ fig = px.timeline(list_dict, x_start='Start', x_end='Finish', color="Satellite:", y='Glacier',
41
+ color_discrete_sequence=px.colors.qualitative.G10, template="plotly_white",
42
+ height=300, category_orders={'Glacier': ['COL', 'Mapple', 'Crane', 'Jorum', 'DBE', 'SI', 'JAC'],
43
+ 'Satellite:': ['ERS', 'RSAT', 'ENVISAT', 'PALSAR', 'TSX', 'TDX',
44
+ 'S1']})
45
+ fig.update_layout(legend=dict(
46
+ orientation="h",
47
+ yanchor="bottom",
48
+ y=1.02,
49
+ xanchor="right",
50
+ x=1,
51
+ ),
52
+ margin=dict(l=0, r=0, t=0, b=0), )
53
+ fig.update_layout(
54
+ font=dict(family="Computer Modern", size=14))
55
+ fig.write_image("output/dataset_timeline.pdf", format='pdf')
56
+ # fig.show()
create_plots_new/front_change.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+
3
+ import numpy as np
4
+ import os
5
+ import plotly.express as px
6
+ import plotly.figure_factory as ff
7
+ import datetime
8
+ import plotly.io as pio
9
+ import plotly.graph_objs as go
10
+
11
+ pio.kaleido.scope.mathjax = None
12
+ import math
13
+ # import pylab
14
+ from matplotlib.colors import LinearSegmentedColormap
15
+ from PIL import ImageColor
16
+
17
+
18
+ def distribute_glacier(list_of_samples):
19
+ list_of_glaciers = {}
20
+ for glacier in ['JAC']:
21
+ #for glacier in [ 'COL', 'Mapple', 'Crane', 'Jorum','DBE','SI', 'JAC']:
22
+ list_of_glaciers[glacier] = [sample for sample in list_of_samples if glacier in sample]
23
+ return list_of_glaciers
24
+
25
+
26
+ def create_dict(list_of_samples):
27
+ list_dict = []
28
+ for sample in list_of_samples:
29
+ sample_split = sample.split('_')
30
+ finish_date = datetime.datetime.fromisoformat(sample_split[1]) + datetime.timedelta(days=50)
31
+ sample_dict = {
32
+ 'Glacier': sample_split[0],
33
+ 'Start': sample_split[1],
34
+ 'Finish': str(finish_date),
35
+ 'Satellite:': sample_split[2]
36
+ }
37
+ list_dict.append(sample_dict)
38
+ return list_dict
39
+
40
+
41
+ if __name__ == '__main__':
42
+ train_dir = '/home/ho11laqe/PycharmProjects/data_raw/fronts/train/'
43
+ test_dir = '/home/ho11laqe/PycharmProjects/data_raw/fronts/test/'
44
+
45
+ list_of_train_samples = os.listdir(train_dir)
46
+ list_of_test_samples = os.listdir(test_dir)
47
+ list_of_samples = list_of_train_samples + list_of_test_samples
48
+ list_of_glaciers = distribute_glacier(list_of_samples)
49
+ list_dict = create_dict(list_of_samples)
50
+
51
+ # define color map
52
+ colormap = px.colors.sequential.Reds[-1::-1]
53
+ for glacier in list_of_glaciers:
54
+ print(glacier)
55
+ list_of_glaciers[glacier].sort()
56
+
57
+
58
+ if glacier in ['COL', 'Mapple']:
59
+ data_directory = test_dir
60
+ image_directory = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/test/'
61
+ else:
62
+ data_directory = train_dir
63
+ image_directory = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/train/'
64
+
65
+
66
+ # define SAR blackground image
67
+ if glacier == 'COL':
68
+ canvas = cv2.imread(image_directory + 'COL_2011-11-13_TDX_7_1_092.png')
69
+ shape = canvas.shape
70
+
71
+ elif glacier == 'JAC':
72
+ canvas = cv2.imread(image_directory + 'JAC_2009-06-21_TSX_6_1_005.png')
73
+ shape = canvas.shape
74
+
75
+ elif glacier == 'Jorum':
76
+ canvas = cv2.imread(image_directory + 'Jorum_2011-09-04_TSX_7_4_034.png')
77
+ shape = canvas.shape
78
+
79
+ elif glacier == 'Mapple':
80
+ canvas = cv2.imread(image_directory + 'Mapple_2008-10-13_TSX_7_2_034.png')
81
+ shape = canvas.shape
82
+
83
+ elif glacier == 'SI':
84
+ canvas = cv2.imread(image_directory + 'SI_2013-08-14_TSX_7_1_125.png')
85
+
86
+ elif glacier == 'Crane':
87
+ canvas = cv2.imread(image_directory + 'Crane_2008-10-13_TSX_7_3_034.png')
88
+
89
+ elif glacier == 'DBE':
90
+ canvas = cv2.imread(image_directory + 'DBE_2008-03-30_TSX_7_3_049.png')
91
+
92
+ else:
93
+ print('No image for background')
94
+ exit()
95
+
96
+ number_images = len(list_of_glaciers[glacier])
97
+ kernel = np.ones((3, 3), np.uint8)
98
+
99
+ # iterate over all fronts of one glacier
100
+ for i, image_name in enumerate(list_of_glaciers[glacier]):
101
+ front = cv2.imread(data_directory + image_name)
102
+
103
+ # if front label has to be resized to fit background image
104
+ # the front is not dilated.
105
+ if front.shape != canvas.shape:
106
+ front = cv2.resize(front, (shape[1], shape[0]))
107
+
108
+ else:
109
+ front = cv2.dilate(front, kernel)
110
+
111
+ # color interpolation based on position in dataset
112
+ # TODO based on actual date
113
+ index = (1 - i / number_images) * (len(colormap) - 1)
114
+ up = math.ceil(index)
115
+ down = up - 1
116
+ color_up = np.array(ImageColor.getcolor(colormap[up], 'RGB'))
117
+ color_down = np.array(ImageColor.getcolor(colormap[down], 'RGB'))
118
+ dif = up - down
119
+ color = color_up * (1 - dif) + color_down * dif
120
+
121
+ # draw front on canvas
122
+ non_zeros = np.nonzero(front)
123
+ canvas[non_zeros[:2]] = np.uint([color for _ in non_zeros[0]])
124
+
125
+ #scale reference for fontsize
126
+ ref_x = 15000 / 7
127
+
128
+ if glacier == 'COL':
129
+ image = canvas[750:, 200:2800]
130
+ new_shape = image.shape
131
+ res = 7
132
+ scale = new_shape[1] / ref_x
133
+ fig = px.imshow(image, height=new_shape[0]- int(80 * scale), width=new_shape[1])
134
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.4, 4.4],
135
+ ticktext=['2011<br>(+0.8°C)', '2020<br>(+1.2°C)'],
136
+ outlinewidth=0)
137
+
138
+ elif glacier == 'Mapple':
139
+ image = canvas
140
+ new_shape = image.shape
141
+ res = 7
142
+ scale = new_shape[1] / ref_x
143
+ fig = px.imshow(image, height=new_shape[0] - int(150 * scale), width=new_shape[1])
144
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['2006', '2020 '],
145
+ outlinewidth=0)
146
+
147
+ elif glacier == 'Crane':
148
+ image = canvas[:2500,:]
149
+ new_shape = image.shape
150
+ res = 7
151
+ scale = new_shape[1] / ref_x
152
+ fig = px.imshow(image, height=new_shape[0] - int(150 * scale), width=new_shape[1])
153
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['2002', '2014'],
154
+ outlinewidth=0)
155
+
156
+ elif glacier == 'Jorum':
157
+ image = canvas#[200:1600, 1500:]
158
+ new_shape = image.shape
159
+ res = 7
160
+ scale = new_shape[1] / ref_x
161
+ fig = px.imshow(image, height=new_shape[0] - int(240 * scale), width=new_shape[1])
162
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['2003', '2020'],
163
+ outlinewidth=0)
164
+
165
+ elif glacier == 'DBE':
166
+ image = canvas[700:, 750:]
167
+ new_shape = image.shape
168
+ res = 7
169
+ scale = new_shape[1] / ref_x
170
+ fig = px.imshow(image, height=new_shape[0] - int(150 * scale), width=new_shape[1])
171
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.7, 4.7], ticktext=['1995', '2014'],
172
+ outlinewidth=0)
173
+
174
+ elif glacier == 'SI':
175
+ image = canvas
176
+ new_shape = image.shape
177
+ res = 7
178
+ scale = new_shape[0] / ref_x
179
+ fig = px.imshow(image, height=new_shape[0] - int(240 * scale), width=new_shape[1])
180
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['1995', '2014'],
181
+ outlinewidth=0)
182
+
183
+ elif glacier == 'JAC':
184
+ image = canvas[:, :]
185
+ new_shape = image.shape
186
+ res = 6
187
+ scale = new_shape[1] / ref_x
188
+ fig = px.imshow(image, height=new_shape[0] - int(340 * scale), width=new_shape[1])
189
+ legend = dict(thickness=int(50 * scale), tickvals=[-4.6, 4.7],
190
+ ticktext=['2009<br>(+0.7°C)', '2015<br>(+0.9°C)'],
191
+ outlinewidth=0)
192
+ else:
193
+ fig = px.imshow(canvas)
194
+ res = 7
195
+ scale = 1
196
+
197
+ colorbar_trace = go.Scatter(x=[None],
198
+ y=[None],
199
+ mode='markers',
200
+ marker=dict(
201
+ colorscale=colormap[::-1],
202
+ showscale=True,
203
+ cmin=-5,
204
+ cmax=5,
205
+ colorbar=legend
206
+ ),
207
+ hoverinfo='none'
208
+ )
209
+ fig.update_layout(yaxis=dict(tickmode='array',
210
+ tickvals=[0, 5000 / res, 10000 / res, 15000 / res, 20000 / res, 25000 / res],
211
+ ticktext=[0, 5, 10, 15, 20, 25],
212
+ title='km'))
213
+ fig.update_layout(xaxis=dict(tickmode='array',
214
+ tickvals=[0, 5000 / res, 10000 / res, 15000 / res, 20000 / res, 25000 / res],
215
+ ticktext=[0, 5, 10, 15, 20, 25],
216
+ title='km'))
217
+
218
+ fig.update_xaxes(tickfont=dict(size=int(40 * scale)))
219
+ fig.update_yaxes(tickfont=dict(size=int(40 * scale)))
220
+ fig.update_layout(font=dict(size=int(60 * scale), family="Computer Modern"))
221
+ fig.update_coloraxes(colorbar_x=0)
222
+ fig['layout']['xaxis']['title']['font']['size'] = int(60 * scale)
223
+ fig['layout']['yaxis']['title']['font']['size'] = int(60 * scale)
224
+
225
+ fig['layout']['showlegend'] = False
226
+ fig.add_trace(colorbar_trace)
227
+ fig.write_image('output/' + glacier + "_front_change.pdf", format='pdf')
228
+ # fig.show()
data_processing/data_postprocessing.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import re
4
+ import pickle
5
+ import cv2
6
+ from einops import rearrange
7
+ from scipy.ndimage.filters import gaussian_filter
8
+ import skimage.measure
9
+ import skimage.color
10
+ from skimage.morphology import skeletonize
11
+ from fil_finder import FilFinder2D
12
+ import astropy.units as u
13
+
14
+
15
+ # ################################################################################################################
16
+ # POSTPROCESSING PUTS THE PATCHES TOGETHER, SUBSTRACTS THE PADDING
17
+ # AND CHOOSES THE CLASS WITH HIGHEST PROBABILITY AS PREDICTION.
18
+ # SECONDLY, THE FRONT LINE IS EXTRACTED FROM THE PREDICTION
19
+ # ################################################################################################################
20
+
21
+
22
+ def is_subarray(subarray, arr):
23
+ """
24
+ Test whether subarray is a subset of arr
25
+ :param subarray: list of numbers
26
+ :param arr: list of numbers
27
+ :return: boolean
28
+ """
29
+ count = 0
30
+ for element in subarray:
31
+ if element in arr:
32
+ count += 1
33
+ if count == len(subarray):
34
+ return True
35
+ return False
36
+
37
+
38
+ def reconstruct_from_patches_and_binarize(src_directory, dst_directory, modality, threshold_front_prob):
39
+ """
40
+ Reconstruct the image from patches in src_directory and store them in dst_directory.
41
+ The src_directory contains masks (patches = number_of_classes x height x width).
42
+ The class with maximum probability will be chosen as prediction after averaging the probabilities across patches
43
+ (if there is an overlap) and the image in dst_directory will only show the prediction (image = height x width)
44
+ :param src_directory: source directory which contains pickled patches (class x height x width)
45
+ :param dst_directory: destination directory
46
+ :param modality: Either "fronts" or "zones"
47
+ :return: prediction (image = height x width)
48
+ """
49
+
50
+ assert modality == "fronts" or modality == "zones", "Modality must either be 'fronts' or 'zones'."
51
+
52
+ patches = os.listdir(src_directory)
53
+ list_of_names = []
54
+ for patch_name in patches:
55
+ list_of_names.append(os.path.split(patch_name)[1].split("__")[0])
56
+ image_names = set(list_of_names)
57
+ for name in image_names:
58
+ print(f"File: {name}")
59
+ # #####################################################################################################
60
+ # Search all patches that belong to the image with the name "name"
61
+ # #####################################################################################################
62
+ pattern = re.compile(name)
63
+ patches_for_image_names = [a for a in patches if pattern.match(a)]
64
+ assert len(patches_for_image_names) > 0, "No patches found for image " + name
65
+ patches_for_image = [] # Will be Number_Of_Patches x Number_Of_Classes x Height x Width
66
+ irow = []
67
+ icol = []
68
+ padded_bottom = int(patches_for_image_names[0][:-4].split("_")[-5])
69
+ padded_right = int(patches_for_image_names[0][:-4].split("_")[-4])
70
+
71
+ for file_name in patches_for_image_names:
72
+ # #####################################################################################################
73
+ # Get the origin of the patches out of their names
74
+ # #####################################################################################################
75
+ # naming convention: nameOfTheOriginalImage__PaddedBottom_PaddedRight_NumberOfPatch_irow_icol.png
76
+
77
+ # Mask patches are 3D arrays with class probabilities
78
+ with open(os.path.join(src_directory, file_name), "rb") as fp:
79
+ class_probabilities_array = pickle.load(fp)
80
+ assert class_probabilities_array.ndim == 3, "Patch " + file_name + " has not enough dimensions (3 needed). Found: " + str(class_probabilities_array.ndim)
81
+ if modality == "fronts":
82
+ assert len(class_probabilities_array) <= 2, "Patch " + file_name + " has too many classes (<2 needed). Found: " + str(len(class_probabilities_array))
83
+ else:
84
+ assert len(class_probabilities_array) <= 4, "Patch " + file_name + " has too many classes (<4 needed). Found: " + str(len(class_probabilities_array))
85
+ patches_for_image.append(class_probabilities_array)
86
+ irow.append(int(os.path.split(file_name)[1][:-4].split("_")[-2]))
87
+ icol.append(int(os.path.split(file_name)[1][:-4].split("_")[-1]))
88
+
89
+ # Images are masks and store the probabilities for each class (patch = number_class x height x width)
90
+ class_patches_for_image = []
91
+ patches_for_image = [np.array(x) for x in patches_for_image]
92
+ patches_for_image = np.array(patches_for_image)
93
+ for class_layer in range(len(patches_for_image[0])):
94
+ class_patches_for_image.append(patches_for_image[:, class_layer, :, :])
95
+
96
+ class_probabilities_complete_image = []
97
+
98
+ # #####################################################################################################
99
+ # Reconstruct image (with number of channels = classes) from patches
100
+ # #####################################################################################################
101
+ for class_number in range(len(class_patches_for_image)):
102
+ class_probability_complete_image, _ = reconstruct_from_grayscale_patches_with_origin(class_patches_for_image[class_number],
103
+ origin=(irow, icol), use_gaussian=True)
104
+ class_probabilities_complete_image.append(class_probability_complete_image)
105
+
106
+ ######################################################################################################
107
+ # Cut Padding
108
+ ######################################################################################################
109
+ if modality == "zones":
110
+ class_probabilities_complete_image = np.array(class_probabilities_complete_image)
111
+ class_probabilities_complete_image = class_probabilities_complete_image[:, :-padded_bottom, :-padded_right]
112
+ else:
113
+ class_probabilities_complete_image = rearrange(class_probabilities_complete_image, '1 h w -> h w')
114
+ class_probabilities_complete_image = np.array(class_probabilities_complete_image)
115
+ class_probabilities_complete_image = class_probabilities_complete_image[:-padded_bottom, :-padded_right]
116
+
117
+ # #####################################################################################################
118
+ # Get prediction from probabilities
119
+ # #####################################################################################################
120
+ if modality == "zones":
121
+ # Choose class with highest probability as prediction
122
+ prediction = np.argmax(class_probabilities_complete_image, axis=0)
123
+ else:
124
+ # Take a threshold to get the class
125
+ prediction = class_probabilities_complete_image
126
+ prediction[prediction > threshold_front_prob] = 1
127
+ prediction[prediction <= threshold_front_prob] = 0
128
+
129
+ # #####################################################################################################
130
+ # Convert [0, 1] to [0, 255] range
131
+ # #####################################################################################################
132
+ if modality == "fronts":
133
+ prediction[prediction == 0] = 0
134
+ prediction[prediction == 1] = 255
135
+ assert (is_subarray(np.unique(prediction), [0, 255])), "Unique front values are not correct"
136
+ else:
137
+ prediction[prediction == 0] = 0
138
+ prediction[prediction == 1] = 64
139
+ prediction[prediction == 2] = 127
140
+ prediction[prediction == 3] = 254
141
+ assert (is_subarray(np.unique(prediction), [0, 64, 127, 254])), "Unique zone values are not correct"
142
+
143
+ cv2.imwrite(os.path.join(dst_directory, name + '.png'), prediction)
144
+
145
+
146
+ def get_gaussian(patch_size, sigma_scale=1. / 8) -> np.ndarray:
147
+ """
148
+ Returns Gaussian map with size of patch and sig
149
+ :param patch_size: The size of the image patches -> gaussian importance map will have the same size
150
+ :param sigma_scale: A scaling factor
151
+ :return: Gaussian importance map
152
+ """
153
+ tmp = np.zeros(patch_size)
154
+ center_coords = [i // 2 for i in patch_size]
155
+ sigmas = [i * sigma_scale for i in patch_size]
156
+ tmp[tuple(center_coords)] = 1
157
+ gaussian_importance_map = gaussian_filter(tmp, sigmas, 0, mode='constant', cval=0)
158
+ gaussian_importance_map = gaussian_importance_map / np.max(gaussian_importance_map) * 1
159
+ gaussian_importance_map = gaussian_importance_map.astype(np.float32)
160
+
161
+ # gaussian_importance_map cannot be 0, otherwise we may end up with nans!
162
+ gaussian_importance_map[gaussian_importance_map == 0] = np.min(
163
+ gaussian_importance_map[gaussian_importance_map != 0])
164
+
165
+ return gaussian_importance_map
166
+
167
+
168
+ def reconstruct_from_grayscale_patches_with_origin(patches, origin, use_gaussian, epsilon=1e-12):
169
+ """Rebuild an image from a set of patches by averaging. The reconstructed image will have different dimensions than
170
+ the original image if the strides and offsets of the patches were changed from the defaults!
171
+ Adopted from: http://jamesgregson.ca/extract-image-patches-in-python.html
172
+ :param patches: (ndarray) input patches as (N,patch_height,patch_width) array
173
+ :param origin: (2-tuple) = row index and column index coordinates of each patch
174
+ :param use_gaussian: Boolean to turn on Gaussian Importance Weighting
175
+ :param epsilon: (scalar) regularization term for averaging when patches some image pixels are not covered by any patch
176
+ :return image, weight
177
+ image (ndarray): output image reconstructed from patches of size (max(origin[0])+patches.shape[1], max(origin[1])+patches.shape[2])
178
+ weight (ndarray): output weight matrix consisting of the count of patches covering each pixel
179
+ """
180
+ patches = np.array(patches)
181
+ origin = np.array(origin)
182
+ patch_height = len(patches[0])
183
+ patch_width = len(patches[0][0])
184
+ img_height = np.max(origin[0]) + patch_height
185
+ img_width = np.max(origin[1]) + patch_width
186
+
187
+ out = np.zeros((img_height, img_width))
188
+ wgt = np.zeros((img_height, img_width))
189
+ if use_gaussian:
190
+ scale_wgt = get_gaussian((patch_height, patch_width))
191
+ else:
192
+ scale_wgt = np.ones((patch_height, patch_width))
193
+
194
+ for i in range(patch_height):
195
+ for j in range(patch_width):
196
+ out[origin[0]+i, origin[1]+j] += patches[:, i, j] * scale_wgt[i, j]
197
+ wgt[origin[0] + i, origin[1] + j] += scale_wgt[i, j]
198
+
199
+ return out / np.maximum(wgt, epsilon), wgt
200
+
201
+
202
+ def postprocess_zone_segmenation(mask):
203
+ """
204
+ Post-process zone segmentation by filling gaps in ocean region and creating cluster of ocean mask and removing clusters except for the largest -> left with one big ocean.
205
+ :param mask: a numpy array representing the segmentation mask with 1 channel
206
+ :return mask: a numpy array representing the filtered mask with 1 channel
207
+ """
208
+
209
+ # #############################################################################################
210
+ # Fill Gaps in Ocean
211
+ # #############################################################################################
212
+ # get inverted ocean mask
213
+ ocean_mask = mask == 254
214
+ ocean_mask = np.invert(ocean_mask)
215
+ labeled_image, num_cluster = skimage.measure.label(ocean_mask, connectivity=2, return_num=True)
216
+
217
+ # extract largest cluster
218
+ cluster_size = np.zeros(num_cluster + 1)
219
+ for cluster_label in range(1, num_cluster + 1):
220
+ cluster = labeled_image == cluster_label
221
+ cluster_size[cluster_label] = cluster.sum()
222
+
223
+ final_cluster = cluster_size.argmax()
224
+
225
+ # create map of the gaps in ocean area
226
+ gaps_mask = np.zeros_like(labeled_image)
227
+ gaps_mask[labeled_image >= 1] = 1
228
+ gaps_mask[labeled_image == final_cluster] = 0
229
+ # fill gaps
230
+ mask[gaps_mask == 1] = 254
231
+
232
+ # #############################################################################################
233
+ # Take largest connected component of ocean as ocean
234
+ # #############################################################################################
235
+ # Connected Component Analysis
236
+ ocean_mask = mask >= 254 # Ocean (254)
237
+ labeled_image, num_cluster = skimage.measure.label(ocean_mask, connectivity=2, return_num=True)
238
+ if num_cluster == 0:
239
+ return mask
240
+
241
+ # extract largest cluster
242
+ cluster_size = np.zeros(num_cluster + 1) # +1 for background
243
+ for cluster_label in range(1, num_cluster + 1): # +1 as range(x, y) is exclusive for y
244
+ cluster = labeled_image == cluster_label
245
+ cluster_size[cluster_label] = cluster.sum()
246
+
247
+ final_cluster = cluster_size.argmax()
248
+ final_mask = labeled_image == final_cluster
249
+
250
+ # overwrite small ocean cluster (254) with glacier value (127) (it is not important with what value these are
251
+ # filled, as these pixels are not at the boundary between ocean and glacier anymore and hence do not contribute to
252
+ # the front delineation)
253
+ mask[mask == 254] = 127
254
+ mask[final_mask] = 254
255
+
256
+ return mask
257
+
258
+
259
+ def extract_front_from_zones(zone_mask, front_length_threshold):
260
+ """
261
+ Extract front prediction from zone segmentation by choosing the boundary between glacier and ocean as front and deleting to short fronts.
262
+ :param zone_mask: zone segmentation prediction
263
+ :param front_length_threshold: Threshold for deletion of too short front predictions
264
+ :return: the front prediction
265
+ """
266
+ # detect edge between ocean and glacier
267
+ mask_mi = np.pad(zone_mask, ((1, 1), (1, 1)), mode='constant')
268
+ mask_le = np.pad(zone_mask, ((1, 1), (0, 2)), mode='constant')
269
+ mask_ri = np.pad(zone_mask, ((1, 1), (2, 0)), mode='constant')
270
+ mask_do = np.pad(zone_mask, ((0, 2), (1, 1)), mode='constant')
271
+ mask_up = np.pad(zone_mask, ((2, 0), (1, 1)), mode='constant')
272
+
273
+ front = np.logical_and(mask_mi == 254, np.logical_or.reduce((mask_do == 127, mask_up == 127, mask_ri == 127, mask_le == 127)))
274
+ front = front[1:-1, 1:-1].astype(float)
275
+
276
+ # delete too short fronts
277
+ labeled_front, num_cluster = skimage.measure.label(front, connectivity=2, return_num=True)
278
+ if num_cluster == 0:
279
+ return front * 255
280
+
281
+ for cluster_label in range(1, num_cluster + 1): # +1 as range(x, y) is exclusive for y
282
+ cluster = labeled_front == cluster_label
283
+ cluster_size = cluster.sum()
284
+ if cluster_size <= front_length_threshold:
285
+ front[labeled_front == cluster_label] = 0
286
+ else:
287
+ front[labeled_front == cluster_label] = 1
288
+
289
+ front *= 255
290
+ return front
291
+
292
+
293
+ def postprocess_front_segmenation(complete_predicted_mask, threshold_front_length):
294
+ """
295
+ Post-process the front segmentation by skeletonization, filament extraction, and deletion of too short fronts
296
+ :param complete_predicted_mask: front segmentation prediction
297
+ :param threshold_front_length: Threshold for deletion of too short front predictions
298
+ :return: the post-processed front prediction
299
+ """
300
+ if len(np.unique(complete_predicted_mask)) == 1:
301
+ print(f"No front predicted {np.unique(complete_predicted_mask)}")
302
+ return complete_predicted_mask
303
+ skeleton = skeletonize(complete_predicted_mask)
304
+ fil = FilFinder2D(skeleton, distance=None, mask=skeleton)
305
+ fil.preprocess_image(skip_flatten=True)
306
+ fil.create_mask(use_existing_mask=True)
307
+ fil.medskel(verbose=False)
308
+ fil.analyze_skeletons(skel_thresh=5 * u.pix)
309
+ # find longest path through the skeleton and delete all other branches
310
+ skeleton_longpaths = fil.skeleton_longpath
311
+ # delete fronts that are too short
312
+ labeled_skeleton_longpaths, num_cluster = skimage.measure.label(skeleton_longpaths, connectivity=2, return_num=True)
313
+ if num_cluster == 0:
314
+ return skeleton_longpaths
315
+
316
+ for cluster_label in range(1, num_cluster + 1): # +1 as range(x, y) is exclusive for y
317
+ cluster = labeled_skeleton_longpaths == cluster_label
318
+ cluster_size = cluster.sum()
319
+ if cluster_size <= threshold_front_length:
320
+ skeleton_longpaths[labeled_skeleton_longpaths == cluster_label] = 0
321
+ else:
322
+ skeleton_longpaths[labeled_skeleton_longpaths == cluster_label] = 1
323
+ return skeleton_longpaths
documentation/common_problems_and_solutions.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Common Issues and their Solutions
2
+
3
+ ## RuntimeError: Expected scalar type half but found float
4
+
5
+ This can happen when running inference (or training) with mixed precision enabled on older GPU hardware. It points
6
+ to some operation not being implemented in half precision for the type of GPU you are using. There are flags to enforce
7
+ the use of fp32 for both nnUNet_predict and nnUNet_train. If you run into this error, using these flags will probably
8
+ solve it. See `nnUNet_predict -h` and `nnUNet_train -h` for what the flags are.
9
+
10
+ ## nnU-Net gets 'stuck' during preprocessing, training or inference
11
+ nnU-Net uses python multiprocessing to leverage multiple CPU cores during preprocessing, background workers for data
12
+ augmentation in training, preprocessing of cases during inference as well as resampling and exporting the final
13
+ predictions during validation and inference. Unfortunately, python (or maybe it is just me as a programmer) is not
14
+ very good at communicating errors that happen in background workers, causing the main process to indefinitely wait for
15
+ them to return indefinitely.
16
+
17
+ Whenever nnU-Net appears to be stuck, this is what you should do:
18
+
19
+ 1) There is almost always an error message which will give you an indication of what the problem is. This error message
20
+ is often not at the bottom of the text output, but further up. If you run nnU-Net on a GPU cluster (like we do) the
21
+ error message may be WAYYYY off in the log file, sometimes at the very start of the training/inference. Locate the
22
+ error message (if necessary copy the stdout to a text editor and search for 'error')
23
+
24
+ 2) If there is no error message, this could mean that your OS silently killed a background worker because it was about
25
+ to go out of memory. In this case, please rerun whatever command you have been running and closely monitor your system
26
+ RAM (not GPU memory!) usage. If your RAM is full or close to full, you need to take action:
27
+ - reduce the number of background workers: use `-tl` and `-tf` in `nnUNet_plan_and_preprocess` (you may have to
28
+ go as low as 1!). Reduce the number of workers used by `nnUNet_predict` by reducing `--num_threads_preprocessing` and
29
+ `--num_threads_nifti_save`.
30
+ - If even `-tf 1` during preprocessing is not low enough, consider adding a swap partition located on an SSD.
31
+ - upgrade your RAM! (32 GB should get the job done)
32
+
33
+
34
+ ## nnU-Net training: RuntimeError: CUDA out of memory
35
+
36
+ This section is dealing with error messages such as this:
37
+
38
+ ```
39
+ RuntimeError: CUDA out of memory. Tried to allocate 4.16 GiB (GPU 0; 10.76 GiB total capacity; 2.82 GiB already allocated; 4.18 GiB free; 4.33 GiB reserved in total by PyTorch)
40
+ ```
41
+
42
+ This message appears when the GPU memory is insufficient. For most datasets, nnU-Net uses about 8GB of video memory.
43
+ To ensure that you can run all trainings, we recommend to use a GPU with at least 11GB (this will have some headroom).
44
+ If you are running other programs on the GPU you intend to train on (for example the GUI of your operating system),
45
+ the amount of VRAM available to nnU-Net is less than whatever is on your GPU. Please close all unnecessary programs or
46
+ invest in a second GPU. We for example like to use a low cost GPU (GTX 1050 or slower) for the display outputs while
47
+ having the 2080ti (or equivelant) handle the training.
48
+
49
+ At the start of each training, cuDNN will run some benchmarks in order to figure out the fastest convolution algorithm
50
+ for the current network architecture (we use `torch.backends.cudnn.benchmark=True`). VRAM consumption will jump all over
51
+ the place while these benchmarks run and can briefly exceed the 8GB nnU-Net typically requires. If you keep running into
52
+ `RuntimeError: CUDA out of memory` problems you may want to consider disabling that. You can do so by setting the
53
+ `--deterministic` flag when using `nnUNet_train`. Setting this flag can slow down your training, so it is recommended
54
+ to only use it if necessary.
55
+
56
+ ## nnU-Net training in Docker container: RuntimeError: unable to write to file </torch_781_2606105346>
57
+
58
+ Nvidia NGC (https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) is a great place to find Docker containers with
59
+ the most recent software (pytorch, cuDNN, etc.) in them. When starting Docker containers with command provided on the
60
+ Nvidia website, the docker will crash with errors like this when running nnU-Net: `RuntimeError: unable to write to
61
+ file </torch_781_2606105346>`. Please start the docker with the `--ipc=host` flag to solve this.
62
+
63
+ ## Downloading pretrained models: unzip: cannot find zipfile directory in one of /home/isensee/.nnunetdownload_16031094034174126
64
+
65
+ Sometimes downloading the large zip files containing our pretrained models can fail and cause the error above. Please
66
+ make sure to use the most recent nnU-Net version (we constantly try to improve the downloading). If that does not fix it
67
+ you can always download the zip file from our zenodo (https://zenodo.org/record/4003545) and use the
68
+ `nnUNet_install_pretrained_model_from_zip` command to install the model.
69
+
70
+ ## Downloading pre-trained models: `unzip: 'unzip' is not recognized as an internal or external command` OR `Command 'unzip' not found`
71
+
72
+ On Windows systems and on a bare WSL2 system, the `unzip` command may not be present.
73
+ Either install it, unzip the pre-trained model from zenodo download, or update to a newer version of nnUNet that uses the Python build in
74
+ (https://docs.python.org/3/library/zipfile.html)
75
+
76
+ ## nnU-Net training (2D U-Net): High (and increasing) system RAM usage, OOM
77
+
78
+ There was a issue with mixed precision causing a system RAM memory leak. This is fixed when using cuDNN 8.0.2 or newer,
79
+ but the current pytorch master comes with cuDNN 7.6.5. If you encounter this problem, please consider using Nvidias NGC
80
+ pytorch container for training (the pytorch it comes with has a recent cuDNN version). You can also install the new
81
+ cuDNN version on your system and compile pytorch yourself (instructions on the pytorch website!). This is what we do at DKFZ.
82
+
83
+
84
+ ## nnU-Net training of cascade: Error `seg from prev stage missing`
85
+ You need to run all five folds of `3d_lowres`. Segmentations of the previous stage can only be generated from the
86
+ validation set, otherwise we would overfit.
87
+
88
+ ## nnU-Net training: `RuntimeError: CUDA error: device-side assert triggered`
89
+ This error often goes along with something like `void THCudaTensor_scatterFillKernel(TensorInfo<Real, IndexType>,
90
+ TensorInfo<long, IndexType>, Real, int, IndexType) [with IndexType = unsigned int, Real = float, Dims = -1]:
91
+ block: [4770,0,0], thread: [374,0,0] Assertion indexValue >= 0 && indexValue < tensor.sizes[dim] failed.`.
92
+
93
+ This means that your dataset contains unexpected values in the segmentations. nnU-Net expects all labels to be
94
+ consecutive integers. So if your dataset has 4 classes (background and three foregound labels), then the labels
95
+ must be 0, 1, 2, 3 (where 0 must be background!). There cannot be any other values in the ground truth segmentations.
96
+
97
+ If you run `nnUNet_plan_and_preprocess` with the `--verify_dataset_integrity` option, this should never happen because
98
+ it will check for wrong values in the label images.
99
+
100
+ ## nnU-Net training: Error: mmap length is greater than file size and EOFError
101
+ Please delete all .npy files in the nnUNet_preprocessed folder of the test you were trying to train. Then try again.
102
+
103
+ ## running nnU-Net on Azure instances
104
+ see https://github.com/MIC-DKFZ/nnUNet/issues/437, thank you @Alaska47
documentation/common_questions.md ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FAQ
2
+
3
+ ## Where can I find the segmentation metrics of my experiments?
4
+ **Results for the validation sets of each fold** are stored in the respective output folder after the training is completed. For example, this could be.
5
+ `${RESULTS_FOLDER}/nnUNet/3d_fullres/Task003_Liver/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0`. After training there will
6
+ be a `validation_raw` subfolder and a `validation_raw_postprocessed` subfolder. In each of these folders is going to
7
+ be a `summary.json` file with the segmentation metrics. There are metrics for each individual validation case and then
8
+ at the bottom there is also a mean across all cases.
9
+
10
+ **Cross-validation metrics** can only be computed after all five folds were run. You first need to run
11
+ `nnUNet_determine_postprocessing` first (see `nnUNet_determine_postprocessing -h` for help). This will collect the
12
+ predictions from the validation sets of the five folds, compute metrics on them and then determine the postprocessing.
13
+ Once this is all done, there will be new folders located in the output directory (for example
14
+ `${RESULTS_FOLDER}/nnUNet/3d_fullres/Task003_Liver/nnUNetTrainerV2__nnUNetPlansv2.1/`): `cv_niftis_raw` (raw predictions
15
+ from the cross-validation) and `cv_niftis_postprocessed` (postprocessed predictions). In each of these folders is
16
+ going to be a `summary.json` file with the metrics (see above).
17
+
18
+ Note that the postprocessing determined on each individual fold is completely ignored by nnU-Net because it needs to
19
+ find a single postprocessing configuration for the whole cross-validation. The postprocessed results in each fold are
20
+ just for development purposes!
21
+
22
+ **Test set results** see [here](#evaluating-test-set-results).
23
+
24
+ **Ensemble performance** will be accessible here `${RESULTS_FOLDER}/nnUNet/ensembles/TASKNAME` after you ran
25
+ `nnUNet_find_best_configuration`. There are summary.csv for a quick overview and then there is also going to be
26
+ detailed results in the form of `summary.json` in the respective subfolders.
27
+
28
+ ## What postprocessing is selected?
29
+ After you run `nnUNet_determine_postprocessing` (see `nnUNet_determine_postprocessing -h` for help) there will be a
30
+ `postprocessing.json` file located in the output directory of your training (for example
31
+ `${RESULTS_FOLDER}/nnUNet/3d_fullres/Task003_Liver/nnUNetTrainerV2__nnUNetPlansv2.1/`). If you open this with a text
32
+ editor, there is a key "for_which_classes", followed by some list. For LiTS (classes 0: bg, 1: liver, 2: tumor)
33
+ this can for example be:
34
+ ```python
35
+ "for_which_classes": [
36
+ [
37
+ 1,
38
+ 2
39
+ ],
40
+ 1
41
+ ```
42
+ This means that nnU-Net will first remove all but the largest components for the merged object consisting of classes
43
+ 1 and 2 (essentially the liver including the tumors) and then in a second step also remove all but the largest
44
+ connected component for the liver class.
45
+
46
+ Note that you do not have to run `nnUNet_determine_postprocessing` if you use `nnUNet_find_best_configuration`.
47
+ `nnUNet_find_best_configuration` will do that for you.
48
+
49
+ Ensemble results and postprocessing will be stored in `${RESULTS_FOLDER}/nnUNet/ensembles`
50
+ (this will all be generated by `nnUNet_find_best_configuration`).
51
+
52
+ ## Evaluating test set results
53
+ This feature was only added recently. Please run `pip install --upgrade nnunet` or reinstall nnunet from the master.
54
+
55
+ You can now use `nnUNet_evaluate_folder` to compute metrics on predicted test cases. For example:
56
+
57
+ ```
58
+ nnUNet_evaluate_folder -ref FOLDER_WITH_GT -pred FOLDER_WITH_PREDICTIONS -l 1 2 3 4
59
+ ```
60
+
61
+ This example is for a dataset that has 4 foreground classes (labels 1, 2, 3, 4). `FOLDER_WITH_GT` and
62
+ `FOLDER_WITH_PREDICTIONS` must contain files with the same names containing the reference and predicted segmentations
63
+ of each case, respectivelty. The files must be nifti (end with .nii.gz).
64
+
65
+ ## Creating and managing data splits
66
+
67
+ At the start of each training, nnU-Net will check whether the splits_final.pkl file is present in the directory where
68
+ the preprocessed data of the requested dataset is located. If the file is not present, nnU-Net will create its own
69
+ split: a five-fold cross-validation using all the available training cases. nnU-Net needs this five-fold
70
+ cross-validation to be able to determine the postprocessing and to run model/ensemble selection.
71
+
72
+ There are however situations in which you may want to create your own split, for example
73
+ - in datasets like ACDC where several training cases are connected (there are two time steps for each patient) you
74
+ may need to manually create splits to ensure proper stratification.
75
+ - cases are annotated by multiple annotators and you would like to use the annotations as separate training examples
76
+ - if you are running experiments with a domain transfer, you might want to train only on cases from domain A and
77
+ validate on domain B
78
+ - ...
79
+
80
+ Creating your own data split is simple: the splits_final.pkl file contains the following data structure (assume there are five training cases A, B, C, D, and E):
81
+ ```python
82
+ splits = [
83
+ {'train': ['A', 'B', 'C', 'D'], 'val': ['E']},
84
+ {'train': ['A', 'B', 'C', 'E'], 'val': ['D']},
85
+ {'train': ['A', 'B', 'D', 'E'], 'val': ['C']},
86
+ {'train': ['A', 'C', 'D', 'E'], 'val': ['B']},
87
+ {'train': ['B', 'C', 'D', 'E'], 'val': ['A']}
88
+ ]
89
+ ```
90
+
91
+ Use load_pickle and save_pickle from batchgenerators.utilities.file_and_folder_operations for loading/storing the splits.
92
+
93
+ Splits is a list of length NUMBER_OF_FOLDS. Each entry in the list is a dict, with 'train' and 'val' as keys and lists
94
+ of the corresponding case names (without the _0000 etc!) as values.
95
+
96
+ nnU-Net's five-fold cross validation will always create a list of len(splits)=5. But you can do whatever you want. Note
97
+ that if you define only 4 splits (fold 0-3) and then set fold=4 when training (that would be the fifth split),
98
+ nnU-Net will print a warning and proceed to use a random 80:20 data split.
99
+
100
+ ## How can I swap component XXX (for example the loss) of nnU-Net?
101
+
102
+ All changes in nnU-Net are handled the same way:
103
+
104
+ 1) create a new nnU-Net trainer class. Place the file somewhere in the nnunet.training.network_training folder
105
+ (any subfolder will do. If you create a new subfolder, make sure to include an empty `__init__.py` file!)
106
+
107
+ 2) make your new trainer class derive from the trainer you would like to change (most likely this is going to be nnUNetTrainerV2)
108
+
109
+ 3) identify the function that you need to overwrite. You may have to go up the inheritance hierarchy to find it!
110
+
111
+ 4) overwrite that function in your custom trainer, make sure whatever you do is compatible with the rest of nnU-Net
112
+
113
+ What these changes need to look like specifically is hard to say without knowing what you are exactly trying to do.
114
+ Before you open a new issue on GitHub, please have a look around the `nnunet.training.network_training` folder first!
115
+ There are tons of examples modifying various parts of the pipeline.
116
+
117
+ Also see [here](extending_nnunet.md)
118
+
119
+ ## How does nnU-Net handle multi-modal images?
120
+
121
+ Multi-modal images are treated as color channels. BraTS, which comes with T1, T1c, T2 and Flair images for each
122
+ training case will thus for example have 4 input channels.
123
+
124
+ ## Why does nnU-Net not use all my GPU memory?
125
+
126
+ nnU-net and all its parameters are optimized for a training setting that uses about 8GB of VRAM for a network training.
127
+ Using more VRAM will not speed up the training. Using more VRAM has also not (yet) been beneficial for model
128
+ performance consistently enough to make that the default. If you really want to train with more VRAM, you can do one of these things:
129
+
130
+ 1) Manually edit the plans files to increase the batch size. A larger batch size gives better (less noisy) gradients
131
+ and may improve your model performance if the dataset is large. Note that nnU-Net always runs for 1000 epochs with 250
132
+ iterations each (250000 iterations). The training time thus scales approximately linearly with the batch size
133
+ (batch size 4 is going to need twice as long for training than batch size 2!)
134
+
135
+ 2) Manually edit the plans files to increase the patch size. This one is tricky and should only been attempted if you
136
+ know what you are doing! Again, training times will be increased if you do this! 3) is a better way of increasing the
137
+ patch size.
138
+
139
+ 3) Run `nnUNet_plan_and_preprocess` with a larger GPU memory budget. This will make nnU-Net plan for larger patch sizes
140
+ during experiment planning. Doing this can change the patch size, network topology, the batch size as well as the
141
+ presence of the U-Net cascade. To run with a different memory budget, you need to specify a different experiment planner, for example
142
+ `nnUNet_plan_and_preprocess -t TASK_ID -pl2d None -pl3d ExperimentPlanner3D_v21_32GB` (note that `-pl2d None` will
143
+ disable 2D U-Net configuration. There is currently no planner for larger 2D U-Nets). We have planners for 8 GB (default),
144
+ 11GB and 32GB available. If you need a planner for a different GPU size, you should be able to quickly hack together
145
+ your own using the code of the 11GB or 32GB planner (same goes for a 2D planner). Note that we have experimented with
146
+ these planners and not found an increase in segmentation performance as a result of using them. Training times are
147
+ again longer than with the default.
148
+
149
+ ## Do I need to always run all U-Net configurations?
150
+ The model training pipeline above is for challenge participations. Depending on your task you may not want to train all
151
+ U-Net models and you may also not want to run a cross-validation all the time.
152
+ Here are some recommendations about what U-Net model to train:
153
+ - It is safe to say that on average, the 3D U-Net model (3d_fullres) was most robust. If you just want to use nnU-Net because you
154
+ need segmentations, I recommend you start with this.
155
+ - If you are not happy with the results from the 3D U-Net then you can try the following:
156
+ - if your cases are very large so that the patch size of the 3d U-Net only covers a very small fraction of an image then
157
+ it is possible that the 3d U-Net cannot capture sufficient contextual information in order to be effective. If this
158
+ is the case, you should consider running the 3d U-Net cascade (3d_lowres followed by 3d_cascade_fullres)
159
+ - If your data is very anisotropic then a 2D U-Net may actually be a better choice (Promise12, ACDC, Task05_Prostate
160
+ from the decathlon are examples for anisotropic data)
161
+
162
+ You do not have to run five-fold cross-validation all the time. If you want to test single model performance, use
163
+ *all* for `FOLD` instead of a number. Note that this will then not give you an estimate of your performance on the
164
+ training set. You will also no tbe able to automatically identify which ensembling should be used and nnU-Net will
165
+ not be able to configure a postprocessing.
166
+
167
+ CAREFUL: DO NOT use fold=all when you intend to run the cascade! You must run the cross-validation in 3d_lowres so
168
+ that you get proper (=not overfitted) low resolution predictions.
169
+
170
+ ## Sharing Models
171
+ You can share trained models by simply sending the corresponding output folder from `RESULTS_FOLDER/nnUNet` to
172
+ whoever you want share them with. The recipient can then use nnU-Net for inference with this model.
173
+
174
+ You can now also use `nnUNet_export_model_to_zip` to export a trained model (or models) to a zip file. The recipient
175
+ can then use `nnUNet_install_pretrained_model_from_zip` to install the model from this zip file.
176
+
177
+ ## Can I run nnU-Net on smaller GPUs?
178
+ nnU-Net is guaranteed to run on GPUs with 11GB of memory. Many configurations may also run on 8 GB.
179
+ If you have an 11GB and there is still an `Out of Memory` error, please read 'nnU-Net training: RuntimeError: CUDA out of memory' [here](common_problems_and_solutions.md).
180
+
181
+ If you wish to configure nnU-Net to use a different amount of GPU memory, simply adapt the reference value for the GPU memory estimation
182
+ accordingly (with some slack because the whole thing is not an exact science!). For example, in
183
+ [experiment_planner_baseline_3DUNet_v21_11GB.py](nnunet/experiment_planning/experiment_planner_baseline_3DUNet_v21_11GB.py)
184
+ we provide an example that attempts to maximise the usage of GPU memory on 11GB as opposed to the default which leaves
185
+ much more headroom). This is simply achieved by this line:
186
+
187
+ ```python
188
+ ref = Generic_UNet.use_this_for_batch_size_computation_3D * 11 / 8
189
+ ```
190
+
191
+ with 8 being what is currently used (approximately) and 11 being the target. Should you get CUDA out of memory
192
+ issues, simply reduce the reference value. You should do this adaptation as part of a separate ExperimentPlanner class.
193
+ Please read the instructions [here](documentation/extending_nnunet.md).
194
+
195
+
196
+ ## Why is no 3d_lowres model created?
197
+ 3d_lowres is created only if the patch size in 3d_fullres less than 1/8 of the voxels of the median shape of the data
198
+ in 3d_fullres (for example Liver is about 512x512x512 and the patch size is 128x128x128, so that's 1/64 and thus
199
+ 3d_lowres is created). You can enforce the creation of 3d_lowres models for smaller datasets by changing the value of
200
+ `HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0` (located in experiment_planning.configuration).
201
+
documentation/data_format_inference.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data format for Inference
2
+
3
+ The data format for inference must match the one used for the raw data (specifically, the images must be in exactly
4
+ the same format as in the imagesTr folder). As before, the filenames must start with a
5
+ unique identifier, followed by a 4-digit modality identifier. Here is an example for two different datasets:
6
+
7
+ 1) Task005_Prostate:
8
+
9
+ This task has 2 modalities, so the files in the input folder must look like this:
10
+
11
+ input_folder
12
+ ├── prostate_03_0000.nii.gz
13
+ ├── prostate_03_0001.nii.gz
14
+ ├── prostate_05_0000.nii.gz
15
+ ├── prostate_05_0001.nii.gz
16
+ ├── prostate_08_0000.nii.gz
17
+ ├── prostate_08_0001.nii.gz
18
+ ├── ...
19
+
20
+ _0000 is always the T2 image and _0001 is always the ADC image (as specified by 'modality' in the dataset.json)
21
+
22
+ 2) Task002_Heart:
23
+
24
+ imagesTs
25
+ ├── la_001_0000.nii.gz
26
+ ├── la_002_0000.nii.gz
27
+ ├── la_006_0000.nii.gz
28
+ ├── ...
29
+
30
+ Task002 only has one modality, so each case only has one _0000.nii.gz file.
31
+
32
+
33
+ The segmentations in the output folder will be named INDENTIFIER.nii.gz (omitting the modality identifier).
34
+
documentation/dataset_conversion.md ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dataset conversion instructions
2
+ nnU-Net requires the raw data to be brought into a specific format so that it know how to read and interpret it. This
3
+ format closely, but not entirely, follows the format used by the
4
+ [Medical Segmentation Decathlon](http://medicaldecathlon.com/) (MSD).
5
+
6
+ The entry point to nnU-Net is the nnUNet_raw_data_base folder (which the user needs to specify when installing nnU-Net!).
7
+ Each segmentation dataset is stored as a separate 'Task'. Tasks are associated with a task ID, a three digit integer
8
+ (this is different from the MSD!) and
9
+ a task name (which you can freely choose): Task005_Prostate has 'Prostate' as task name and the task id is 5. Tasks are stored in the
10
+ nnUNet_raw_data_base/nnUNet_raw_data folder like this:
11
+
12
+ nnUNet_raw_data_base/nnUNet_raw_data/
13
+ ├── Task001_BrainTumour
14
+ ├── Task002_Heart
15
+ ├── Task003_Liver
16
+ ├── Task004_Hippocampus
17
+ ├── Task005_Prostate
18
+ ├── ...
19
+
20
+ Within each task folder, the following structure is expected:
21
+
22
+ Task001_BrainTumour/
23
+ ├── dataset.json
24
+ ├── imagesTr
25
+ ├── (imagesTs)
26
+ └── labelsTr
27
+
28
+ **Please make your custom task ids start at 500 to ensure that there will be no conflicts with downloaded pretrained models!!! (IDs cannot exceed 999)**
29
+
30
+ imagesTr contains the images belonging to the training cases. nnU-Net will run pipeline configuration, training with
31
+ cross-validation, as well as finding postprocesing and the best ensemble on this data. imagesTs (optional) contains the
32
+ images that belong to the
33
+ test cases , labelsTr the images with the ground truth segmentation maps for the training cases. dataset.json contains
34
+ metadata of the dataset.
35
+
36
+ Each training case is associated with an identifier = a unique name for that case. This identifier is used by nnU-Net to
37
+ recognize which label file belongs to which image. **All images (including labels) must be 3D nifti files (.nii.gz)!**
38
+
39
+ The image files can have any scalar pixel type. The label files must contain segmentation maps that contain consecutive integers,
40
+ starting with 0: 0, 1, 2, 3, ... num_labels. 0 is considered background. Each class then has its own associated integer
41
+ value.
42
+ Images may have multiple modalities. This is especially often the case for medical images. Modalities are very much
43
+ like color channels in photos (three color channels: red, green blue), but can be much more diverse: CT, different types
44
+ or MRI, and many other. Imaging modalities are identified by nnU-Net by their suffix: a four-digit integer at the end
45
+ of the filename. Imaging files must therefore follow the following naming convention: case_identifier_XXXX.nii.gz.
46
+ Hereby, XXXX is the modality identifier. What modalities these identifiers belong to is specified in the dataset.json
47
+ file (see below). Label files are saved as case_identifier.nii.gz
48
+
49
+ This naming scheme results in the following folder structure. It is the responsibility of the user to bring their
50
+ data into this format!
51
+
52
+ Here is an example for the first Task of the MSD: BrainTumour. Each image has four modalities: FLAIR (0000),
53
+ T1w (0001), T1gd (0002) and T2w (0003). Note that the imagesTs folder is optional and does not have to be present.
54
+
55
+ nnUNet_raw_data_base/nnUNet_raw_data/Task001_BrainTumour/
56
+ ├── dataset.json
57
+ ├── imagesTr
58
+ │   ├── BRATS_001_0000.nii.gz
59
+ │   ├── BRATS_001_0001.nii.gz
60
+ │   ├── BRATS_001_0002.nii.gz
61
+ │   ├── BRATS_001_0003.nii.gz
62
+ │   ├── BRATS_002_0000.nii.gz
63
+ │   ├── BRATS_002_0001.nii.gz
64
+ │   ├── BRATS_002_0002.nii.gz
65
+ │   ├── BRATS_002_0003.nii.gz
66
+ │   ├── BRATS_003_0000.nii.gz
67
+ │   ├── BRATS_003_0001.nii.gz
68
+ │   ├── BRATS_003_0002.nii.gz
69
+ │   ├── BRATS_003_0003.nii.gz
70
+ │   ├── BRATS_004_0000.nii.gz
71
+ │   ├── BRATS_004_0001.nii.gz
72
+ │   ├── BRATS_004_0002.nii.gz
73
+ │   ├── BRATS_004_0003.nii.gz
74
+ │   ├── ...
75
+ ├── imagesTs
76
+ │   ├── BRATS_485_0000.nii.gz
77
+ │   ├── BRATS_485_0001.nii.gz
78
+ │   ├── BRATS_485_0002.nii.gz
79
+ │   ├── BRATS_485_0003.nii.gz
80
+ │   ├── BRATS_486_0000.nii.gz
81
+ │   ├── BRATS_486_0001.nii.gz
82
+ │   ├── BRATS_486_0002.nii.gz
83
+ │   ├── BRATS_486_0003.nii.gz
84
+ │   ├── BRATS_487_0000.nii.gz
85
+ │   ├── BRATS_487_0001.nii.gz
86
+ │   ├── BRATS_487_0002.nii.gz
87
+ │   ├── BRATS_487_0003.nii.gz
88
+ │   ├── BRATS_488_0000.nii.gz
89
+ │   ├── BRATS_488_0001.nii.gz
90
+ │   ├── BRATS_488_0002.nii.gz
91
+ │   ├── BRATS_488_0003.nii.gz
92
+ │   ├── BRATS_489_0000.nii.gz
93
+ │   ├── BRATS_489_0001.nii.gz
94
+ │   ├── BRATS_489_0002.nii.gz
95
+ │   ├── BRATS_489_0003.nii.gz
96
+ │   ├── ...
97
+ └── labelsTr
98
+ ├── BRATS_001.nii.gz
99
+ ├── BRATS_002.nii.gz
100
+ ├── BRATS_003.nii.gz
101
+ ├── BRATS_004.nii.gz
102
+ ├── ...
103
+
104
+ Here is another example of the second task of the MSD, which has only one modality:
105
+
106
+ nnUNet_raw_data_base/nnUNet_raw_data/Task002_Heart/
107
+ ├── dataset.json
108
+ ├── imagesTr
109
+ │   ├── la_003_0000.nii.gz
110
+ │   ├── la_004_0000.nii.gz
111
+ │   ├── ...
112
+ ├── imagesTs
113
+ │   ├── la_001_0000.nii.gz
114
+ │   ├── la_002_0000.nii.gz
115
+ │   ├── ...
116
+ └── labelsTr
117
+ ├── la_003.nii.gz
118
+ ├── la_004.nii.gz
119
+ ├── ...
120
+
121
+ For each training case, all images must have the same geometry to ensure that their pixel arrays are aligned. Also
122
+ make sure that all your data is co-registered!
123
+
124
+ The dataset.json file used by nnU-Net is identical to the ones used by the MSD. For your custom tasks you need to create
125
+ them as well and thereby exactly follow the same structure. [This](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2)
126
+ is where you can download the MSD data for reference.
127
+
128
+ **NEW:** There now is a utility with which you can generate the dataset.json automatically. You can find it
129
+ [here](../nnunet/dataset_conversion/utils.py) (look for the function `generate_dataset_json`).
130
+ See [Task120](../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py) for an example on how to use it. And read
131
+ its documentation!
132
+
133
+ Here is the content of the dataset.json from the Prostate task:
134
+
135
+ {
136
+ "name": "PROSTATE",
137
+ "description": "Prostate transitional zone and peripheral zone segmentation",
138
+ "reference": "Radboud University, Nijmegen Medical Centre",
139
+ "licence":"CC-BY-SA 4.0",
140
+ "relase":"1.0 04/05/2018",
141
+ "tensorImageSize": "4D",
142
+ "modality": {
143
+ "0": "T2",
144
+ "1": "ADC"
145
+ },
146
+ "labels": {
147
+ "0": "background",
148
+ "1": "PZ",
149
+ "2": "TZ"
150
+ },
151
+ "numTraining": 32,
152
+ "numTest": 16,
153
+ "training":[{"image":"./imagesTr/prostate_16.nii.gz","label":"./labelsTr/prostate_16.nii.gz"},{"image":"./imagesTr/prostate_04.nii.gz","label":"./labelsTr/prostate_04.nii.gz"},...],
154
+ "test": ["./imagesTs/prostate_08.nii.gz","./imagesTs/prostate_22.nii.gz","./imagesTs/prostate_30.nii.gz",...]
155
+ }
156
+
157
+ Note that we truncated the "training" and "test" lists for clarity. You need to specify all the cases in there. If you
158
+ don't have test images (imagesTs does not exist) you can leave "test" blank: `"test": []`.
159
+
160
+ Please also have a look at the python files located [here](../nnunet/dataset_conversion). They show how we created our
161
+ custom dataset.jsons for a range of public datasets.
162
+
163
+ ## How to use decathlon datasets
164
+ The previous release of nnU-Net allowed users to either start with 4D or 3D niftis. This resulted in some confusion,
165
+ however, because some users would not know where they should save their data. We therefore dropped support for the 4D
166
+ niftis used by the MSD. Instead, we provide a utility that converts the MSD datasets into the format specified above:
167
+
168
+ ```bash
169
+ nnUNet_convert_decathlon_task -i FOLDER_TO_TASK_AS_DOWNLOADED_FROM_MSD -p NUM_PROCESSES
170
+ ```
171
+
172
+ FOLDER_TO_TASK_AS_DOWNLOADED_FROM_MSD needs to point to the downloaded task folder (such as Task05_Prostate, note the
173
+ 2-digit task id!). The converted Task will be saved under the same name in nnUNet_raw_data_base/nnUNet_raw_data
174
+ (but with a 3 digit identifier). You can overwrite the task id of the converted task by using the `-output_task_id` option.
175
+
176
+
177
+ ## How to use 2D data with nnU-Net
178
+ nnU-Net was originally built for 3D images. It is also strongest when applied to 3D segmentation problems because a
179
+ large proportion of its design choices were built with 3D in mind. Also note that many 2D segmentation problems,
180
+ especially in the non-biomedical domain, may benefit from pretrained network architectures which nnU-Net does not
181
+ support.
182
+ Still, there is certainly a need for an out of the box segmentation solution for 2D segmentation problems. And
183
+ also on 2D segmentation tasks nnU-Net cam perform extremely well! We have, for example, won a 2D task in the cell
184
+ tracking challenge with nnU-Net (see our Nature Methods paper) and we have also successfully applied nnU-Net to
185
+ histopathological segmentation problems.
186
+ Working with 2D data in nnU-Net requires a small workaround in the creation of the dataset. Essentially, all images
187
+ must be converted to pseudo 3D images (so an image with shape (X, Y) needs to be converted to an image with shape
188
+ (1, X, Y). The resulting image must be saved in nifti format. Hereby it is important to set the spacing of the
189
+ first axis (the one with shape 1) to a value larger than the others. If you are working with niftis anyways, then
190
+ doing this should be easy for you. This example here is intended for demonstrating how nnU-Net can be used with
191
+ 'regular' 2D images. We selected the massachusetts road segmentation dataset for this because it can be obtained
192
+ easily, it comes with a good amount of training cases but is still not too large to be difficult to handle.
193
+
194
+ See [here](../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py) for an example.
195
+ This script contains a lot of comments and useful information. Also have a look
196
+ [here](../nnunet/dataset_conversion/Task089_Fluo-N2DH-SIM.py).
197
+
198
+ ## How to update an existing dataset
199
+ When updating a dataset you not only need to change the data located in `nnUNet_raw_data_base/nnUNet_raw_data`. Make
200
+ sure to also delete the whole (!) corresponding dataset in `nnUNet_raw_data_base/nnUNet_cropped_data`. nnU-Net will not
201
+ repeat the cropping (and thus will not update your dataset) if the old files are still in nnUNet_cropped_data!
202
+
203
+ The best way of updating an existing dataset is (**choose one**):
204
+ - delete all data and models belonging to the old version of the dataset (nnUNet_preprocessed, corresponding results
205
+ in RESULTS_FOLDER/nnUNet, nnUNet_cropped_data, nnUNet_raw_data), then update
206
+ - (recommended) create the updated dataset from scratch using a new task ID **and** name
207
+
208
+
209
+ ## How to convert other image formats to nifti
210
+ Please have a look at the following tasks:
211
+ - [Task120](../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py): 2D png images
212
+ - [Task075](../nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py) and [Task076](../nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py): 3D tiff
213
+ - [Task089](../nnunet/dataset_conversion/Task089_Fluo-N2DH-SIM.py) 2D tiff
documentation/expected_epoch_times.md ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Introduction
2
+ Trainings can take some time. A well-running training setup is essential to get the most of nnU-Net. nnU-Net does not
3
+ require any fancy hardware, just a well-balanced system. We recommend at least 32 GB of RAM, 6 CPU cores (12 threads),
4
+ SSD storage (this can be SATA and does not have to be PCIe. DO NOT use an external SSD connected via USB!) and a
5
+ 2080 ti GPU. If your system has multiple GPUs, the
6
+ other components need to scale linearly with the number of GPUs.
7
+
8
+ # Benchmark Details
9
+ To ensure your system is running as intended, we provide some benchmark numbers against which you can compare. Here
10
+ are the details about benchmarking:
11
+
12
+ - We benchmark **2d**, **3d_fullres** and a modified 3d_fullres that uses 3x the default batch size (called **3d_fullres large** here)
13
+ - The datasets **Task002_Heart**, **Task005_Prostate** and **Task003_Liver** of the Medical Segmentation Decathlon are used
14
+ (they provide a good spectrum of dataset properties)
15
+ - we use the nnUNetTrainerV2_5epochs trainer. This will run only for 5 epochs and it will skip validation.
16
+ From the 5 epochs, we select the fastest one as the epoch time.
17
+ - We will also be running the nnUNetTrainerV2_5epochs_dummyLoad trainer on the 3d_fullres config (called **3d_fullres dummy**). This trainer does not use
18
+ the dataloader and instead uses random dummy inputs, bypassing all data augmentation (CPU) and I/O bottlenecks.
19
+ - All trainings are done with mixed precision. This is why Pascal GPUs (Titan Xp) are so slow (they do not have
20
+ tensor cores)
21
+
22
+ # How to run the benchmark
23
+ First go into the folder where the preprocessed data and plans file of the task you would like to use are located. For me this is
24
+ `/home/fabian/data/nnUNet_preprocessed/Task002_Heart`
25
+
26
+ Then run the following python snippet. This will create our custom **3d_fullres_large** configuration. Note that this
27
+ large configuration will only run on GPUs with 16GB or more! We included it in the test because some GPUs
28
+ (V100, and probably also A100) can shine when they get more work to do per iteration.
29
+ ```python
30
+ from batchgenerators.utilities.file_and_folder_operations import *
31
+ plans = load_pickle('nnUNetPlansv2.1_plans_3D.pkl')
32
+ stage = max(plans['plans_per_stage'].keys())
33
+ plans['plans_per_stage'][stage]['batch_size'] *= 3
34
+ save_pickle(plans, 'nnUNetPlansv2.1_bs3x_plans_3D.pkl')
35
+ ```
36
+
37
+ Now you can run the benchmarks. Each should only take a couple of minutes
38
+ ```bash
39
+ nnUNet_train 2d nnUNetTrainerV2_5epochs TASKID 0
40
+ nnUNet_train 3d_fullres nnUNetTrainerV2_5epochs TASKID 0
41
+ nnUNet_train 3d_fullres nnUNetTrainerV2_5epochs_dummyLoad TASKID 0
42
+ nnUNet_train 3d_fullres nnUNetTrainerV2_5epochs TASKID 0 -p nnUNetPlansv2.1_bs3x # optional, only for GPUs with more than 16GB of VRAM
43
+ ```
44
+
45
+ The time we are interested in is the epoch time. You can find it in the text output (stdout) or the log file
46
+ located in your `RESULTS_FOLDER`. Note that the trainers used here run for 5 epochs. Select the fastest time from your
47
+ output as your benchmark time.
48
+
49
+ # Results
50
+
51
+ The following table shows the results we are getting on our servers/workstations. We are using pytorch 1.7.1 that we
52
+ compiled ourselves using the instrucutions found [here](https://github.com/pytorch/pytorch#from-source). The cuDNN
53
+ version we used is 8.1.0.77. You should be seeing similar numbers when you
54
+ run the benchmark on your server/workstation. Note that fluctuations of a couple of seconds are normal!
55
+
56
+ IMPORTANT: Compiling pytorch from source is currently mandatory for best performance! Pytorch 1.8 does not have
57
+ working tensorcore acceleration for 3D convolutions when installed with pip or conda!
58
+
59
+ IMPORTANT: A100 and V100 are very fast with the newer cuDNN versions and need more CPU workers to prevent bottlenecks,
60
+ set the environment variable `nnUNet_n_proc_DA=XX`
61
+ to increase the number of data augmentation workers. Recommended: 20 for V100, 32 for A100. Datasets with many input
62
+ modalities (BraTS: 4) require A LOT of CPU and should be used with even larger values for `nnUNet_n_proc_DA`
63
+
64
+ ## Pytorch 1.7.1 compiled with cuDNN 8.1.0.77
65
+
66
+ | | A100 40GB (DGX A100) 400W | V100 32GB SXM3 (DGX2) 350W | V100 32GB PCIe 250W | Quadro RTX6000 24GB 260W | Titan RTX 24GB 280W | RTX 2080 ti 11GB 250W | Titan Xp 12GB 250W |
67
+ |-----------------------------------|---------------------------|----------------------------|---------------------|--------------------------|---------------------|-----------------------|--------------------|
68
+ | Task002_Heart 2d | 40.06 | 66.03 | 76.19 | 78.01 | 79.78 | 98.49 | 177.87 |
69
+ | Task002_Heart 3d_fullres | 51.17 | 85.96 | 99.29 | 110.47 | 112.34 | 148.36 | 504.93 |
70
+ | Task002_Heart 3d_fullres dummy | 48.53 | 79 | 89.66 | 105.16 | 105.56 | 138.4 | 501.64 |
71
+ | Task002_Heart 3d_fullres large | 118.5 | 220.45 | 251.25 | 322.28 | 300.96 | OOM | OOM |
72
+ | | | | | | | | |
73
+ | Task003_Liver 2d | 39.71 | 60.69 | 69.65 | 72.29 | 76.17 | 92.54 | 183.73 |
74
+ | Task003_Liver 3d_fullres | 44.48 | 75.53 | 87.19 | 85.18 | 86.17 | 106.76 | 290.87 |
75
+ | Task003_Liver 3d_fullres dummy | 41.1 | 70.96 | 80.1 | 79.43 | 79.43 | 101.54 | 289.03 |
76
+ | Task003_Liver 3d_fullres large | 115.33 | 213.27 | 250.09 | 261.54 | 266.66 | OOM | OOM |
77
+ | | | | | | | | |
78
+ | Task005_Prostate 2d | 42.21 | 68.88 | 80.46 | 83.62 | 81.59 | 102.81 | 183.68 |
79
+ | Task005_Prostate 3d_fullres | 47.19 | 76.33 | 85.4 | 100 | 102.05 | 132.82 | 415.45 |
80
+ | Task005_Prostate 3d_fullres dummy | 43.87 | 70.58 | 81.32 | 97.48 | 98.99 | 124.73 | 410.12 |
81
+ | Task005_Prostate 3d_fullres large | 117.31 | 209.12 | 234.28 | 277.14 | 284.35 | OOM | OOM |
82
+
83
+ # Troubleshooting
84
+ Your epoch times are substantially slower than ours? That's not good! This section will help you figure out what is
85
+ wrong. Note that each system is unique and we cannot help you find bottlenecks beyond providing the information
86
+ presented in this section!
87
+
88
+ ## First step: Make sure you have the right software!
89
+ In order to get maximum performance, you need to have pytorch compiled with a recent cuDNN version (8002 or newer is a must!).
90
+ Unfortunately the currently provided pip/conda installable pytorch versions have a bug which causes their performance
91
+ to be very low (see https://github.com/pytorch/pytorch/issues/57115 and https://github.com/pytorch/pytorch/issues/50153).
92
+ They are about 2x-3x slower than the numbers we report in the table above.
93
+ You need to have a pytorch version that was compiled from source to get maximum performance as shown in the table above.
94
+ The easiest way to get that is by using the [Nvidia pytorch Docker](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch).
95
+ If you cannot use docker, you will need to compile pytorch
96
+ yourself. For that, first download and install cuDNN from the [Nvidia homepage](https://developer.nvidia.com/cudnn), then follow the
97
+ [instructions on the pytorch website](https://github.com/pytorch/pytorch#from-source) on how to compile it.
98
+
99
+ If you compiled pytorch yourself, you can check for the correct cuDNN version by running:
100
+ ```bash
101
+ python -c 'import torch;print(torch.backends.cudnn.version())'
102
+ ```
103
+ If the output is `8002` or higher, then you are good to go. If not you may have to take action. IMPORTANT: this
104
+ only applies to pytorch that was compiled from source. pip/conda installed pytorch will report a new cuDNN version
105
+ but still have poor performance due to the bug linked above.
106
+
107
+ ## Identifying the bottleneck
108
+ If the software is up to date and you are still experiencing problems, this is how you can figure out what is going on:
109
+
110
+ While a training is running, run `htop` and `watch -n 0.1 nvidia-smi` (depending on your region you may have to use
111
+ `0,1` instead). If you have physical access to the machine, also have a look at the LED indicating I/O activity.
112
+
113
+ Here is what you can read from that:
114
+ - `nvidia-smi` shows the GPU activity. `watch -n 0.1` makes this command refresh every 0.1s. This will allow you to
115
+ see your GPU in action. A well running training will have your GPU pegged at 90-100% with no drops in GPU utilization.
116
+ Your power should also be close to the maximum (for example `237W / 250 W`) at all times.
117
+ - `htop` gives you an overview of the CPU usage. nnU-Net uses 12 processes for data augmentation + one main process.
118
+ This means that up to 13 processes should be running simultaneously.
119
+ - the I/O LED indicates that your system is reading/writing data from/to your hard drive/SSD. Whenever this is
120
+ blinking your system is doing something with your HDD/SSD.
121
+
122
+ ### GPU bottleneck
123
+ If `nvidia-smi` is constantly showing 90-100% GPU utilization and the reported power draw is near the maximum, your
124
+ GPU is the bottleneck. This is great! That means that your other components are not slowing it down. Your epochs times
125
+ should be the same as ours reported above. If they are not then you need to investigate your software stack (see cuDNN stuff above).
126
+
127
+ What can you do about it?
128
+ 1) There is nothing holding you back. Everything is fine!
129
+ 2) If you need faster training, consider upgrading your GPU. Performance numbers are above, feel free to use them for guidance.
130
+ 3) Think about whether you need more (slower) GPUs or less (faster) GPUs. Make sure to include Server/Workstation
131
+ costs into your calculations. Sometimes it is better to go with more cheaper but slower GPUs run run multiple trainings
132
+ in parallel.
133
+
134
+ ### CPU bottleneck
135
+ You can recognize a CPU bottleneck as follows:
136
+ 1) htop is consistently showing 10+ processes that are associated with your nnU-Net training
137
+ 2) nvidia-smi is reporting jumps of GPU activity with zeroes in between
138
+
139
+ What can you do about it?
140
+ 1) Depending on your single core performance, some datasets may require more than the default 12 processes for data
141
+ augmentation. The CPU requirements for DA increase roughly linearly with the number of input modalities. Most datasets
142
+ will train fine with much less than 12 (6 or even just 4). But datasets with for example 4 modalities may require more.
143
+ If you have more than 12 CPU threads available, set the environment variable `nnUNet_n_proc_DA` to a number higher than 12.
144
+ 2) If your CPU has less than 12 threads in total, running 12 threads can overburden it. Try lowering `nnUNet_n_proc_DA`
145
+ to the number of threads you have available.
146
+ 3) (sounds stupid, but this is the only other way) upgrade your CPU. I have seen Servers with 8 CPU cores (16 threads)
147
+ and 8 GPUs in them. That is not well balanced. CPUs are cheap compared to GPUs. On a 'workstation' (single or dual GPU)
148
+ you can get something like a Ryzen 3900X or 3950X. On a server you could consider Xeon 6226R or 6258R on the Intel
149
+ side or the EPYC 7302P, 7402P, 7502P or 7702P on the AMD side. Make sure to scale the number of cores according to your
150
+ number of GPUs and use case. Feel free to also use our nnU-net recommendations from above.
151
+
152
+ ### I/O bottleneck
153
+ On a workstation, I/O bottlenecks can be identified by looking at the LED indicating I/O activity. This is what an
154
+ I/O bottleneck looks like:
155
+ - nvidia-smi is reporting jumps of GPU activity with zeroes in between
156
+ - htop is not showing many active CPU processes
157
+ - I/O LED is blinking rapidly or turned on constantly
158
+
159
+ Detecting I/O bottlenecks is difficult on servers where you may not have physical access. Tools like `iotop` are
160
+ difficult to read and can only be run with sudo. However, the presence of an I/O LED is not strictly necessary. If
161
+ - nvidia-smi is reporting jumps of GPU activity with zeroes in between
162
+ - htop is not showing many active CPU processes
163
+
164
+ then the only possible issue to my knowledge is in fact an I/O bottleneck.
165
+
166
+ Here is what you can do about an I/O bottleneck:
167
+ 1) Make sure you are actually using an SSD to store the preprocessed data (`nnUNet_preprocessed`). Do not use an
168
+ SSD connected via USB! Never use a HDD. Do not use a network drive that was not specifically designed to handle fast I/O
169
+ (Note that you can use a network drive if it was designed for this purpose. At the DKFZ we use a
170
+ [flashblade](https://www.purestorage.com/products/file-and-object/flashblade.html) connected via ethernet and that works
171
+ great)
172
+ 2) A SATA SSD is only enough to feed 1-2 GPUs. If you have more GPUs installed you may have to upgrade to an nvme
173
+ drive (make sure to get PCIe interface!).
documentation/extending_nnunet.md ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Extending/Changing nnU-Net
3
+
4
+ To use nnU-Net as a framework and make changes to its components, please make sure to install it with the `git clone`
5
+ and `pip install -e .` commands so that a local copy of the code is created.
6
+ Changing components of nnU-Net needs to be done in different places, depending on whether these components belong to
7
+ the inferred, blueprint or empirical parameters. We cover some of the most common use cases below. They should give
8
+ you a good indication of where to start.
9
+
10
+ Generally it is recommended to look into the code where the thing you would like to change is currently implemented
11
+ and then derive a strategy on how to change it. If you have any questions, feel free to open an issue on GitHub and
12
+ we will help you as much as we can.
13
+
14
+ ## Changes to blueprint parameters
15
+ This section gives guidance on how to implement changes to loss function, training schedule, learning rates, optimizer,
16
+ some architecture parameters, data augmentation etc. All these parameters are part of the **nnU-Net trainer class**,
17
+ which we have already seen in the sections above. The default trainer class for 2D, 3D low resolution and 3D full
18
+ resolution U-Net is nnUNetTrainerV2, the default for the 3D full resolution U-Net from the cascade is
19
+ nnUNetTrainerV2CascadeFullRes. Trainer classes in nnU-Net inherit form each other, nnUNetTrainerV2CascadeFullRes for
20
+ example has nnUNetTrainerV2 as parent class and only overrides cascade-specific code.
21
+
22
+ Due to the inheritance of trainer classes, changes can be integrated into nnU-Net quite easily and with minimal effort.
23
+ Simply create a new trainer class (with some custom name), change the functionality you need to change and then specify
24
+ this class (via its name) during training - done.
25
+
26
+ This process requires the new class to be located in a subfolder of nnunet.training.network_training! Do not save it
27
+ somewhere else or nnU-Net will not be able to find it! Also don't use the same name twice! nnU-Net always picks the
28
+ first trainer that matches the requested name.
29
+
30
+ Don't worry about overwriting results of another trainer class. nnU-Net always generates output folders that are named
31
+ after the trainer class used to generate the results.
32
+
33
+ Due to the variety of possible changes to the blueprint parameters of nnU-Net, we here only present a summary of where
34
+ to look for what kind of modification. During method development we have already created a large number of nnU-Net
35
+ blueprint variations which should give a good indication of where to start:
36
+
37
+ | Type of modification | Examples |
38
+ |-------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
39
+ | loss function | nnunet.training.network_training.loss_function.* |
40
+ | data augmentation | nnunet.training.network_training.data_augmentation.* |
41
+ | Optimizer, lr, momentum | nnunet.training.network_training.optimizer_and_lr.* |
42
+ | (Batch)Normalization | nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_BN.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_FRN.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_GN.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_NoNormalization_lr1en3.py |
43
+ | Nonlinearity | nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_ReLU.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_Mish.py |
44
+ | Architecture | nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_3ConvPerStage.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_ResencUNet |
45
+ | ... | (see nnunet.training.network_training and subfolders) |
46
+
47
+ ## Changes to Inferred Parameters
48
+ The inferred parameters are determined based on the dataset fingerprint, a low dimensional representation of the properties
49
+ of the training cases. It captures, for example, the image shapes, voxel spacings and intensity information from
50
+ the training cases. The datset fingerprint is created by the DatasetAnalyzer (which is located in nnunet.preprocessing)
51
+ while running `nnUNet_plan_and_preprocess`.
52
+
53
+ `nnUNet_plan_and_preprocess` uses so called ExperimentPlanners for running the adaptation process. Default ExperimentPlanner
54
+ classes are ExperimentPlanner2D_v21 for the 2D U-Net and ExperimentPlanner3D_v21 for the 3D full resolution U-Net and the
55
+ U-Net cascade. Just like nnUNetTrainers, the ExperimentPlanners inherit from each other, resulting in minimal programming
56
+ effort to incorporate changes. Just like with the trainers, simply give your custom ExperimentPlanners a unique name and
57
+ save them in some subfolder of nnunet.experiment_planning. You can then specify your class names when running
58
+ `nnUNet_plan_and_preprocess` and nnU-Net will find them automatically. When inheriting form ExperimentPlanners, you **MUST**
59
+ overwrite the class variables `self.data_identifier` and `self.plans_fname` (just like for example
60
+ [here](../nnunet/experiment_planning/alternative_experiment_planning/normalization/experiment_planner_3DUNet_CT2.py)).
61
+ If you omit this step the planner will overwrite the plans file and the preprocessed data of the planner it inherits from.
62
+
63
+ To train with your custom configuration, simply specify the correct plans identifier with `-p` when you call the
64
+ `nnUNet_train` command. The plans file also contains the data_identifier specified in your ExperimentPlanner, so the
65
+ trainer class will automatically know what data should be used.
66
+
67
+ Possible adaptations to the inferred parameters could include a different way of prioritizing batch size vs patch size
68
+ (currently, nnU-Net prioritizies patch size), a different handling of the spacing information for architecture template
69
+ instantiation, changing the definition of target spacing, or using different strategies for finding the 3d low
70
+ resolution U-Net configuration.
71
+
72
+ The folders located in nnunet.experiment_planning contain several example ExperimentPlanner that modify various aspects
73
+ of the inferred parameters. You can use them as inspiration for your own.
74
+
75
+ If you wish to run a different preprocessing, you most likely will have to implement your own Preprocessor class.
76
+ The preprocessor class that is used by some ExperimentPlanner is specified in its preprocessor_name class variable. The
77
+ default is `self.preprocessor_name = "GenericPreprocessor"` for 3D and `PreprocessorFor2D` for 2D (the 2D preprocessor
78
+ ignores the target spacing for the first axis to ensure that images are only resampled in the axes that will make up the training samples).
79
+ GenericPreprocessor (and all custom Preprocessors you implement) must be located in nnunet.preprocessing. The
80
+ preprocessor_name is saved in the plans file (by ExperimentPlanner), so that the
81
+ nnUNetTrainer knows which preprocessor must be used during inference to match the preprocessing of the training data.
82
+
83
+ Modifications to the preprocessing pipeline could be the addition of bias field correction to MRI images, a different CT
84
+ preprocessing scheme or a different way of resampling segmentations and image data for anisotropic cases.
85
+ An example is provided [here](../nnunet/preprocessing/preprocessing.py).
86
+
87
+ When implementing a custom preprocessor, you should also create a custom ExperimentPlanner that uses it (via self.preprocessor_name).
88
+ This experiment planner must also use a matching data_identifier and plans_fname to ensure no other data is overwritten.
89
+
90
+ ## Use a different network architecture
91
+ Changing the network architecture in nnU-Net is easy, but not self-explanatory. Any new segmentation network you implement
92
+ needs to understand what nnU-Net requests from it (wrt how many downsampling operations are done, whether deep supervision
93
+ is used, what the convolutional kernel sizes are supposed to be). It needs to be able to dynamiccaly change its topology,
94
+ just like our implementation of the [Generic_UNet](../nnunet/network_architecture/generic_UNet.py). Furthermore, it must be
95
+ able to generate a value that can be used to estimate memory consumption. What we have implemented for Generic_UNet effectively
96
+ counts the number of voxels found in all feature maps that are present in a given configuration. Although this estimation
97
+ disregards the number of parameters we have found it to work quite well. Unless you implement an architecture with
98
+ unreasonably high number of parameters, the large majority of the VRAM used during training will be occupied by feature
99
+ maps, so parameters can be (mostly) disregarded. For implementing your own network, it is key to understand that the
100
+ number we are computing here cannot be interpreted directly as memory consumption (other factors than the feature maps
101
+ of the convolutions also play a role, such as instance normalization. This is furthermore very hard to predict because
102
+ there are also several different algorithms for running the convolutions, each with its own memory requirement. We train
103
+ models with cudnn.benchmark=True, so it is impossible to predict which algorithm is used).
104
+ So instead, to approch this problem in the most straightforward way, we manually identify the largest configuration we
105
+ can fit in the GPU of choice (manually define the dowmsampling, patch size etc) and use this value (-10% or so to be save)
106
+ as **reference** in the ExperimentPlanner that uses this architecture.
107
+
108
+ To illustrate this process, we have implemented a U-Net with a residual encoder
109
+ (see FabiansUNet in [generic_modular_residual_UNet.py](../nnunet/network_architecture/generic_modular_residual_UNet.py)).
110
+ This UNet has a class variable called use_this_for_3D_configuration. This value was found with the code located in
111
+ find_3d_configuration (same python file). The corresponding ExperimentPlanner
112
+ [ExperimentPlanner3DFabiansResUNet_v21](../nnunet/experiment_planning/alternative_experiment_planning/experiment_planner_residual_3DUNet_v21.py)
113
+ compares this value to values generated for the currently configured network topology (which are also computed by
114
+ FabiansUNet.compute_approx_vram_consumption) to ensure that the GPU memory target is met.
115
+
116
+ ## Tutorials
117
+ We have created tutorials on how to [manually edit plans files](tutorials/edit_plans_files.md),
118
+ [change the target spacing](tutorials/custom_spacing.md) and
119
+ [changing the normalization scheme for preprocessing](tutorials/custom_preprocessing.md).
documentation/inference_example_Prostate.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Example: inference with pretrained nnU-Net models
2
+
3
+ This is a step-by-step example of how to run inference with pretrained nnU-Net models on the Prostate dataset of the
4
+ Medical Segemtnation Decathlon.
5
+
6
+ 1) Install nnU-Net by following the instructions [here](../readme.md#installation). Make sure to set all relevant paths,
7
+ also see [here](setting_up_paths.md). This step is necessary so that nnU-Net knows where to store trained models.
8
+ 2) Download the Prostate dataset of the Medical Segmentation Decathlon from
9
+ [here](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2). Then extract the archive to a
10
+ destination of your choice.
11
+ 3) We selected the Prostate dataset for this example because we have a utility script that converts the test data into
12
+ the correct format.
13
+
14
+ Decathlon data come as 4D niftis. This is not compatible with nnU-Net (see dataset format specified
15
+ [here](dataset_conversion.md)). Convert the Prostate dataset into the correct format with
16
+
17
+ ```bash
18
+ nnUNet_convert_decathlon_task -i /xxx/Task05_Prostate
19
+ ```
20
+
21
+ Note that `Task05_Prostate` must be the folder that has the three 'imagesTr', 'labelsTr', 'imagesTs' subfolders!
22
+ The converted dataset can be found in `$nnUNet_raw_data_base/nnUNet_raw_data` ($nnUNet_raw_data_base is the folder for
23
+ raw data that you specified during installation)
24
+ 4) Download the pretrained model using this command:
25
+ ```bash
26
+ nnUNet_download_pretrained_model Task005_Prostate
27
+ ```
28
+ 5) The prostate dataset requires two image modalities as input. This is very much liKE RGB images have three color channels.
29
+ nnU-Net recognizes modalities by the file ending: a single test case of the prostate dataset therefore consists of two files
30
+ `case_0000.nii.gz` and `case_0001.nii.gz`. Each of these files is a 3D image. The file ending with 0000.nii.gz must
31
+ always contain the T2 image and 0001.nii.gz the ADC image. Whenever you are using pretrained models, you can use
32
+ ```bash
33
+ nnUNet_print_pretrained_model_info Task005_Prostate
34
+ ```
35
+ to obtain information on which modality needs to get which number. The output for Prostate is the following:
36
+
37
+ Prostate Segmentation.
38
+ Segmentation targets are peripheral and central zone,
39
+ input modalities are 0: T2, 1: ADC.
40
+ Also see Medical Segmentation Decathlon, http://medicaldecathlon.com/
41
+ 6) The script we ran in 3) automatically converted the test data for us and stored them in
42
+ `$nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs`. Note that you need to do this conversion youself when
43
+ using other than Medcial Segmentation Decathlon datasets. No worries. Doing this is easy (often as simple as appending
44
+ a _0000 to the file name if only one input modality is required). Instructions can be found here [here](data_format_inference.md).
45
+ 7) You can now predict the Prostate test cases with the pretrained model. We exemplarily use the 3D full resoltion U-Net here:
46
+ ```bash
47
+ nnUNet_predict -i $nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs/ -o OUTPUT_DIRECTORY -t 5 -m 3d_fullres
48
+ ```
49
+ Note that `-t 5` specifies the task with id 5 (which corresponds to the Prostate dataset). You can also give the full
50
+ task name `Task005_Prostate`. `OUTPUT_DIRECTORY` is where the resulting segmentations are saved.
51
+
52
+ Predictions should be quite fast and you should be done within a couple of minutes. If you would like to speed it
53
+ up (at the expense of a slightly lower segmentation quality) you can disable test time data augmentation by
54
+ setting the `--disable_tta` flag (8x speedup). If this is still too slow for you, you can consider using only a
55
+ single model instead of the ensemble by specifying `-f 0`. This will use only the model trained on fold 0 of the
56
+ cross-validation for another 5x speedup.
57
+ 8) If you want to use an ensemble of different U-Net configurations for inference, you need to run the following commands:
58
+
59
+ Prediction with 3d full resolution U-Net (this command is a little different than the one above).
60
+ ```bash
61
+ nnUNet_predict -i $nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs/ -o OUTPUT_DIRECTORY_3D -t 5 --save_npz -m 3d_fullres
62
+ ```
63
+
64
+ Prediction with 2D U-Net
65
+ ```bash
66
+ nnUNet_predict -i $nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs/ -o OUTPUT_DIRECTORY_2D -t 5 --save_npz -m 2d
67
+ ```
68
+ `--save_npz` will tell nnU-Net to also store the softmax probabilities for ensembling.
69
+
70
+ You can then merge the predictions with
71
+ ```bash
72
+ nnUNet_ensemble -f OUTPUT_DIRECTORY_3D OUTPUT_DIRECTORY_2D -o OUTPUT_FOLDER_ENSEMBLE -pp POSTPROCESSING_FILE
73
+ ```
74
+ This will merge the predictions from `OUTPUT_DIRECTORY_2D` and `OUTPUT_DIRECTORY_3D`. `-pp POSTPROCESSING_FILE`
75
+ (optional!) is a file that gives nnU-Net information on how to postprocess the ensemble. These files were also
76
+ downloaded as part of the pretrained model weights and are located at `RESULTS_FOLDER/nnUNet/ensembles/
77
+ Task005_Prostate/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json`.
78
+ We will make the postprocessing files more accessible in a future (soon!) release.
documentation/setting_up_paths.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Setting up Paths
2
+
3
+ nnU-Net relies on environment variables to know where raw data, preprocessed data and trained model weights are stored.
4
+ To use the full functionality of nnU-Net, the following three environment variables must be set:
5
+
6
+ 1) nnUNet_raw_data_base: This is where nnU-Net finds the raw data and stored the cropped data. The folder located at
7
+ nnUNet_raw_data_base must have at least the subfolder nnUNet_raw_data, which in turn contains one subfolder for each Task.
8
+ It is the responsibility of the user to bring the raw data into the appropriate format - nnU-Net will then take care of
9
+ the rest ;-) For more information on the required raw data format, see [here](dataset_conversion.md).
10
+
11
+ Example tree structure:
12
+ ```
13
+ nnUNet_raw_data_base/nnUNet_raw_data/Task002_Heart
14
+ ├── dataset.json
15
+ ├── imagesTr
16
+ │   ├── la_003_0000.nii.gz
17
+ │   ├── la_004_0000.nii.gz
18
+ │   ├── ...
19
+ ├── imagesTs
20
+ │   ├── la_001_0000.nii.gz
21
+ │   ├── la_002_0000.nii.gz
22
+ │   ├── ...
23
+ └── labelsTr
24
+ ├── la_003.nii.gz
25
+ ├── la_004.nii.gz
26
+ ├── ...
27
+ nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/
28
+ ├── dataset.json
29
+ ├── imagesTr
30
+ │   ├── prostate_00_0000.nii.gz
31
+ │   ├── prostate_00_0001.nii.gz
32
+ │   ├── ...
33
+ ├── imagesTs
34
+ │   ├── prostate_03_0000.nii.gz
35
+ │   ├── prostate_03_0001.nii.gz
36
+ │   ├── ...
37
+ └── labelsTr
38
+ ├── prostate_00.nii.gz
39
+ ├── prostate_01.nii.gz
40
+ ├── ...
41
+ ```
42
+
43
+ 2) nnUNet_preprocessed: This is the folder where the preprocessed data will be saved. The data will also be read from
44
+ this folder during training. Therefore it is important that it is located on a drive with low access latency and high
45
+ throughput (a regular sata or nvme SSD is sufficient).
46
+
47
+ 3) RESULTS_FOLDER: This specifies where nnU-Net will save the model weights. If pretrained models are downloaded, this
48
+ is where it will save them.
49
+
50
+ ### How to set environment variables
51
+ (nnU-Net was developed for Ubuntu/Linux. The following guide is intended for this operating system and will not work on
52
+ others. We do not provide support for other operating systems!)
53
+
54
+ There are several ways you can do this. The most common one is to set the paths in your .bashrc file, which is located
55
+ in your home directory. For me, this file is located at /home/fabian/.bashrc. You can open it with any text editor of
56
+ choice. If you do not see the file, that may be because it is hidden by default. You can run `ls -al /home/fabian` to
57
+ ensure that you see it. In rare cases it may not be present and you can simply create it with `touch /home/fabian/.bashrc`.
58
+
59
+ Once the file is open in a text editor, add the following lines to the bottom:
60
+ ```
61
+ export nnUNet_raw_data_base="/media/fabian/nnUNet_raw"
62
+ export nnUNet_preprocessed="/media/fabian/nnUNet_preprocessed"
63
+ export RESULTS_FOLDER="/media/fabian/nnUNet_trained_models"
64
+ ```
65
+
66
+ (of course adapt the paths to your system and remember that nnUNet_preprocessed should be located on an SSD!)
67
+
68
+ Then save and exit. To be save, make sure to reload the .bashrc by running `source /home/fabian/.bashrc`. Reloading
69
+ needs only be done on terminal sessions that were already open before you saved the changes. Any new terminal you open
70
+ after will have these paths set. You can verify that the paths are set up properly by typing `echo $RESULTS_FOLDER`
71
+ etc and it should print out the correct folder.
72
+
73
+ ### An alternative way of setting these paths
74
+ The method above sets the paths permanently (until you delete the lines from your .bashrc) on your system. If you wish
75
+ to set them only temporarily, you can run the export commands in your terminal:
76
+
77
+ ```
78
+ export nnUNet_raw_data_base="/media/fabian/nnUNet_raw"
79
+ export nnUNet_preprocessed="/media/fabian/nnUNet_preprocessed"
80
+ export RESULTS_FOLDER="/media/fabian/nnUNet_trained_models"
81
+ ```
82
+
83
+ This will set the paths for the current terminal session only (the variables will be lost if you close the terminal
84
+ and need to be reset every time).
documentation/training_example_Hippocampus.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Example: 3D U-Net training on the Hippocampus dataset
2
+
3
+ This is a step-by-step example on how to run a 3D full resolution Training with the Hippocampus dataset from the
4
+ Medical Segmentation Decathlon.
5
+
6
+ 1) Install nnU-Net by following the instructions [here](../readme.md#installation). Make sure to set all relevant paths,
7
+ also see [here](setting_up_paths.md). This step is necessary so that nnU-Net knows where to store raw data,
8
+ preprocessed data and trained models.
9
+ 2) Download the Hippocampus dataset of the Medical Segmentation Decathlon from
10
+ [here](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2). Then extract the archive to a
11
+ destination of your choice.
12
+ 3) Decathlon data come as 4D niftis. This is not compatible with nnU-Net (see dataset format specified
13
+ [here](dataset_conversion.md)). Convert the Hippocampus dataset into the correct format with
14
+
15
+ ```bash
16
+ nnUNet_convert_decathlon_task -i /xxx/Task04_Hippocampus
17
+ ```
18
+
19
+ Note that `Task04_Hippocampus` must be the folder that has the three 'imagesTr', 'labelsTr', 'imagesTs' subfolders!
20
+ The converted dataset can be found in $nnUNet_raw_data_base/nnUNet_raw_data ($nnUNet_raw_data_base is the folder for
21
+ raw data that you specified during installation)
22
+ 4) You can now run nnU-Nets pipeline configuration (and the preprocessing) with the following line:
23
+ ```bash
24
+ nnUNet_plan_and_preprocess -t 4
25
+ ```
26
+ Where 4 refers to the task ID of the Hippocampus dataset.
27
+ 5) Now you can already start network training. This is how you train a 3d full resoltion U-Net on the Hippocampus dataset:
28
+ ```bash
29
+ nnUNet_train 3d_fullres nnUNetTrainerV2 4 0
30
+ ```
31
+ nnU-Net per default requires all trainings as 5-fold cross validation. The command above will run only the training for the
32
+ first fold (fold 0). 4 is the task identifier of the hippocampus dataset. Training one fold should take about 9
33
+ hours on a modern GPU.
34
+
35
+ This tutorial is only intended to demonstrate how easy it is to get nnU-Net running. You do not need to finish the
36
+ network training - pretrained models for the hippocampus task are available (see [here](../readme.md#run-inference)).
37
+
38
+ The only prerequisite for running nnU-Net on your custom dataset is to bring it into a structured, nnU-Net compatible
39
+ format. nnU-Net will take care of the rest. See [here](dataset_conversion.md) for instructions on how to convert
40
+ datasets into nnU-Net compatible format.
documentation/tutorials/custom_preprocessing.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ When you would like to interfere with the way resampling during preprocessing is handled or you would like to implement
2
+ a custom normalization scheme, you need to create a new custom preprocessor class and an ExperimentPlanner to go along
3
+ with it. While this may appear cumbersome, the great thing about this approach is that the same code will be used for
4
+ inference as well thus guaranteeing that images are preprocessed properly (i.e. the way the model expects).
5
+
6
+ In this tutorial we will implement a custom normalization scheme for the Task120 Massachusetts Road Segmentation. Make
7
+ sure to download the dataset and run the code in [Task120_Massachusetts_RoadSegm.py](../../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py) prior to this tutorial.
8
+
9
+ The images in the dataset are RGB with a value range of [0, 255]. nnU-nets defaultnormalization scheme will normalize
10
+ each color channel independently to have mean 0 and standard deviation 1. This works reasonably well, but may result
11
+ in a shift of the color channels relative to each other and thus disturb the models performance. To address that, the new
12
+ normalization will rescale the value range from [0, 255] to [0, 1] by simply dividing the intensities of each image by
13
+ 255. Thus, there will be no longer a shift between the color channels.
14
+
15
+ The new preprocessor class is located in [preprocessor_scale_RGB_to_0_1.py](../../nnunet/preprocessing/custom_preprocessors/preprocessor_scale_RGB_to_0_1.py).
16
+ To acutally use it, we need to tell the ExperimentPlanner its name. For this purpose, it is best to create a new
17
+ ExperimentPlanner class. I created one and placed it in [experiment_planner_2DUNet_v21_RGB_scaleto_0_1.py](../../nnunet/experiment_planning/alternative_experiment_planning/normalization/experiment_planner_2DUNet_v21_RGB_scaleto_0_1.py).
18
+
19
+ Now go have a look at these two classes. Details are in the comments there.
20
+
21
+ To run the new preprocessor, you need to specify its accompanying ExperimentPlanner when running
22
+ `nnUNet_plan_and_preprocess`:
23
+
24
+ ```bash
25
+ nnUNet_plan_and_preprocess -t 120 -pl3d None -pl2d ExperimentPlanner2D_v21_RGB_scaleTo_0_1
26
+ ```
27
+
28
+ After that you can run the training:
29
+
30
+ ```bash
31
+ nnUNet_train 2d nnUNetTrainerV2 120 FOLD -p nnUNet_RGB_scaleTo_0_1
32
+ ```
33
+
34
+ Note that `nnUNet_RGB_scaleTo_0_1` is the plans identifier defined in our custom ExperimentPlanner. Specify it for all
35
+ nnUNet_* commands whenever you want to use the models resulting from this training.
36
+
37
+ Now let all 5 folds run for the original nnU-Net as well as the one that uses the newly defined normalization scheme.
38
+ To compare the results, you can make use of nnUNet_determine_postprocessing to get the necessary metrics, for example:
39
+
40
+ ```bash
41
+ nnUNet_determine_postprocessing -t 120 -tr nnUNetTrainerV2 -p nnUNet_RGB_scaleTo_0_1
42
+ ```
43
+
44
+ This will create a `cv_niftis_raw` and `cv_niftis_postprocessed` subfolder in the training output directory. In each
45
+ of these folders is a summary.json file that you can open with a regular text editor. In this file, there are metrics
46
+ for each training example in the dataset representing the outcome of the 5-fold cross-validation. At the very bottom
47
+ of the file, the metrics are aggregated through averaging (field "mean") and this is what you should be using to
48
+ compare the experiments. I recommend using the non-postprocessed summary.json (located in `cv_niftis_raw`) for this
49
+ because determining the postprocessing may actually overfit to the training dataset. Here are the results I obtained:
50
+
51
+ Vanilla nnU-Net: 0.7720\
52
+ new normalization scheme: 0.7711
53
+
54
+ (no improvement but hey it was worth a try!)
55
+
56
+ Remember to always place custom ExperimentPlanner in nnunet.experiment_planning (any file or submodule) and
57
+ preprocessors in nnunet.preprocessing (any file or submodule). Make sure to use unique names!
58
+
59
+ The example classes from this tutorial only work with 2D. You need to generate a separate set of planner and preprocessor
60
+ for 3D data (cumbersome, I know. Needs to be improved in the future).
documentation/tutorials/custom_spacing.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Sometimes you want to set custom target spacings. This is done by creating a custom ExperimentPlanner.
2
+ Let's run this with the Task002_Heart example from the Medical Segmentation Decathlon. This dataset is not too large
3
+ and working with it is therefore a breeze!
4
+
5
+ This example requires you to have downloaded the dataset and converted it to nnU-Net format with
6
+ nnUNet_convert_decathlon_task
7
+
8
+ We need to run the nnUNet_plan_and_preprocess command with a custom 3d experiment planner to achieve this. I have
9
+ created an appropriate trainer and placed it in [experiment_planner_baseline_3DUNet_v21_customTargetSpacing_2x2x2.py](../../nnunet/experiment_planning/alternative_experiment_planning/target_spacing/experiment_planner_baseline_3DUNet_v21_customTargetSpacing_2x2x2.py)
10
+
11
+ This will set a hard coded target spacing of 2x2x2mm for the 3d_fullres configuration (3d_lowres is unchanged).
12
+ Go have a look at this ExperimentPlanner now.
13
+
14
+ To run nnUNet_plan_and_preprocess with the new ExperimentPlanner, simply specify it:
15
+
16
+ `nnUNet_plan_and_preprocess -t 2 -pl2d None -pl3d ExperimentPlanner3D_v21_customTargetSpacing_2x2x2`
17
+
18
+ Note how we are disabling 2D preprocessing with `-pl2d None`. The ExperimentPlanner I created is only for 3D.
19
+ You will need to generate a separate one for 3D.
20
+
21
+ Once this is completed your task will have been preprocessed with the desired target spacing. You can use it by
22
+ specifying the new custom plans file that is linked to it (see
23
+ `ExperimentPlanner3D_v21_customTargetSpacing_2x2x2` source code) when running any nnUNet_* command, for example:
24
+
25
+ `nnUNet_train 3d_fullres nnUNetTrainerV2 2 FOLD -p nnUNetPlansv2.1_trgSp_2x2x2`
26
+
27
+ (make sure to omit the `_plans_3D.pkl` suffix!)
28
+
29
+ **TODO**: how to compare with the default run?
30
+
31
+ IMPORTANT: When creating custom ExperimentPlanner, make sure to always place them under a unique class name somewhere
32
+ in the nnunet.experiment_planning module. If you create subfolders, make sure they contain an __init__py file
33
+ (can be empty). If you fail to do so nnU-Net will not be able to locate your ExperimentPlanner and crash!
documentation/tutorials/edit_plans_files.md ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Changing the plans files grants you a lot of flexibility: You can depart from nnU-Net's default configuration and play
2
+ with different U-Net topologies, batch sizes and patch sizes. It is a powerful tool!
3
+ To better understand the components describing the network topology in our plans files, please read section 6.2
4
+ in the [supplementary information](https://static-content.springer.com/esm/art%3A10.1038%2Fs41592-020-01008-z/MediaObjects/41592_2020_1008_MOESM1_ESM.pdf)
5
+ (page 13) of our paper!
6
+
7
+ The goal of this tutorial is to demonstrate how to read and modify plans files and how to use them in your
8
+ experiments. The file used here works with Task120 and requires you to have downloaded the dataset, run
9
+ nnunet.dataset_conversion.Task120_Massachusetts_RoadSegm.py and then run nnUNet_plan_and_preprocess for it.
10
+
11
+ Note that this task is 2D only, but the same principles we use here can be easily extended to 3D and other tasks as well.
12
+
13
+ The output of `nnUNet_plan_and_preprocess` for this task looks like this:
14
+
15
+ [{'batch_size': 2,
16
+ 'num_pool_per_axis': [8, 8],
17
+ 'patch_size': array([1280, 1024]),
18
+ 'median_patient_size_in_voxels': array([ 1, 1500, 1500]),
19
+ 'current_spacing': array([999., 1., 1.]),
20
+ 'original_spacing': array([999., 1., 1.]),
21
+ 'pool_op_kernel_sizes': [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]],
22
+ 'conv_kernel_sizes': [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]],
23
+ 'do_dummy_2D_data_aug': False}]
24
+
25
+ This is also essentially what is saved in the plans file under the key 'plans_per_stage'
26
+
27
+ For this task, nnU-Net intends to use a patch size of 1280x1024 and a U-Net architecture with 8 pooling
28
+ operations per axis. Due to GPU memory constraints, the batch size is just 2.
29
+
30
+ Knowing the dataset we could hypothesize that a different approach might produce better results: The decision
31
+ of whether a pixel belongs to 'road' or not does not depend on the large contextual information that the large
32
+ patch size (and U-Net architecture) offer and could instead be made with more local information. Training with
33
+ a batch size of just 2 in a dataset with 800 training cases means that each batch contains only limited variability.
34
+ So one possible conclusion could be that smaller patches but larger batch sizes might result in a better
35
+ segmentation outcome. Let's investigate (using the same GPU memory constraint, determined manually with trial
36
+ and error!):
37
+
38
+ Variant 1: patch size 512x512, batch size 12
39
+ The following snippet makes the necessary adaptations to the plans file
40
+
41
+ ```python
42
+ from batchgenerators.utilities.file_and_folder_operations import *
43
+ import numpy as np
44
+ from nnunet.paths import preprocessing_output_dir
45
+ task_name = 'Task120_MassRoadsSeg'
46
+
47
+ # if it breaks upon loading the plans file, make sure to run the Task120 dataset conversion and
48
+ # nnUNet_plan_and_preprocess first!
49
+ plans_fname = join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_plans_2D.pkl')
50
+ plans = load_pickle(plans_fname)
51
+ plans['plans_per_stage'][0]['batch_size'] = 12
52
+ plans['plans_per_stage'][0]['patch_size'] = np.array((512, 512))
53
+ plans['plans_per_stage'][0]['num_pool_per_axis'] = [7, 7]
54
+ # because we changed the num_pool_per_axis, we need to change conv_kernel_sizes and pool_op_kernel_sizes as well!
55
+ plans['plans_per_stage'][0]['pool_op_kernel_sizes'] = [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]]
56
+ plans['plans_per_stage'][0]['conv_kernel_sizes'] = [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]]
57
+ # for a network with num_pool_per_axis [7,7] the correct length of pool kernel sizes is 7 and the length of conv
58
+ # kernel sizes is 8! Note that you can also change these numbers if you believe it makes sense. A pool kernel size
59
+ # of 1 will result in no pooling along that axis, a kernel size of 3 will reduce the size of the feature map
60
+ # representations by factor 3 instead of 2.
61
+
62
+ # save the plans under a new plans name. Note that the new plans file must end with _plans_2D.pkl!
63
+ save_pickle(plans, join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_ps512_bs12_plans_2D.pkl'))
64
+ ```
65
+
66
+
67
+ Variant 2: patch size 256x256, batch size 60
68
+
69
+ ```python
70
+ from batchgenerators.utilities.file_and_folder_operations import *
71
+ import numpy as np
72
+ from nnunet.paths import preprocessing_output_dir
73
+ task_name = 'Task120_MassRoadsSeg'
74
+ plans_fname = join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_plans_2D.pkl')
75
+ plans = load_pickle(plans_fname)
76
+ plans['plans_per_stage'][0]['batch_size'] = 60
77
+ plans['plans_per_stage'][0]['patch_size'] = np.array((256, 256))
78
+ plans['plans_per_stage'][0]['num_pool_per_axis'] = [6, 6]
79
+ plans['plans_per_stage'][0]['pool_op_kernel_sizes'] = [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]]
80
+ plans['plans_per_stage'][0]['conv_kernel_sizes'] = [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]]
81
+ save_pickle(plans, join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_ps256_bs60_plans_2D.pkl'))
82
+ ```
83
+
84
+ You can now use these custom plans files to train the networks and compare the results! Remeber that all nnUNet_*
85
+ commands have the -h argument to display their arguments. nnUNet_train supports custom plans via the -p argument.
86
+ Custom plans must be the prefix, so here this is everything except '_plans_2D.pkl':
87
+
88
+ Variant 1:
89
+ ```bash
90
+ nnUNet_train 2d nnUNetTrainerV2 120 FOLD -p nnUNetPlansv2.1_ps512_bs12
91
+ ```
92
+
93
+ Variant 2:
94
+ ```bash
95
+ nnUNet_train 2d nnUNetTrainerV2 120 FOLD -p nnUNetPlansv2.1_ps256_bs60
96
+ ```
97
+
98
+
99
+ Let all 5 folds run for each plans file (original and the two variants). To compare the results, you can make use of
100
+ nnUNet_determine_postprocessing to get the necessary metrics, for example:
101
+
102
+ ```bash
103
+ nnUNet_determine_postprocessing -t 120 -tr nnUNetTrainerV2 -p nnUNetPlansv2.1_ps512_bs12 -m 2d
104
+ ```
105
+
106
+ This will create a `cv_niftis_raw` and `cv_niftis_postprocessed` subfolder in the training output directory. In each
107
+ of these folders is a summary.json file that you can open with a regular text editor. In this file, there are metrics
108
+ for each training example in the dataset representing the outcome of the 5-fold cross-validation. At the very bottom
109
+ of the file, the metrics are aggregated through averaging (field "mean") and this is what you should be using to
110
+ compare the experiments. I recommend using the non-postprocessed summary.json (located in `cv_niftis_raw`) for this
111
+ because determining the postprocessing may actually overfit to the training dataset. Here are the results I obtained:
112
+
113
+ Vanilla nnU-Net: 0.7720\
114
+ Variant 1: 0.7724\
115
+ Variant 2: 0.7734
116
+
117
+ The results are remarkable similar and I would not necessarily conclude that such a small improvement in Dice is a
118
+ significant outcome. Nonetheless it was worth a try :-)
119
+
120
+ Despite the results shown here I would like to emphasize that modifying the plans file can be an extremely powerful
121
+ tool to improve the performance of nnU-Net on some datasets. You never know until you try it.
122
+
123
+ **ADDITIONAL INFORMATION (READ THIS!)**
124
+
125
+ - when working with 3d plans ('nnUNetPlansv2.1_plans_3D.pkl') the 3d_lowres and 3d_fullres stage will be encoded
126
+ in the same plans file. If len(plans['plans_per_stage']) == 2, then [0] is the 3d_lowres and [1] is the
127
+ 3d_fullres variant. If len(plans['plans_per_stage']) == 1 then [0] will be 3d_fullres and 3d_cascade_fullres
128
+ (they use the same plans).
129
+
130
+ - 'pool_op_kernel_sizes' together with determines 'patch_size' determines the size of the feature map
131
+ representations at the bottleneck. For Variant 1 & 2 presented here, the size of the feature map representation is
132
+
133
+ `print(plans['plans_per_stage'][0]['patch_size'] / np.prod(plans['plans_per_stage'][0]['pool_op_kernel_sizes'], 0))`
134
+
135
+ > [4., 4.]
136
+
137
+ If you see a non-integer number here, your model will crash! Make sure these are always integers!
138
+ nnU-Net will never create smaller bottlenecks than 4!
139
+
140
+ - do not change the 'current_spacing' in the plans file! This will not work properly. To change the target spacing,
141
+ have a look at the [custom spacing](custom_spacing.md) tutorial.
documentation/using_nnUNet_as_baseline.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ (The U-Net is the current punching bag of methods development. nnU-Net is going to be that looking forward. That is
2
+ cool (great, in fact!), but it should be done correctly. Here are tips on how to benchmark against nnU-Net)
3
+
4
+ This is work in progress
evaluate_nnUNet.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from argparse import ArgumentParser
2
+ import os
3
+ import numpy as np
4
+ from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score
5
+ from data_processing.data_postprocessing import postprocess_zone_segmenation, postprocess_front_segmenation, extract_front_from_zones
6
+ import torch.nn as nn
7
+ #from segmentation_models_pytorch.losses.dice import DiceLoss
8
+ #from PIL import Image
9
+ #from models.front_segmentation_model import DistanceMapBCE
10
+ import re
11
+ from pathlib import Path
12
+ import cv2
13
+ import scipy.stats as st
14
+ from scipy.spatial import distance
15
+ import skimage
16
+ import matplotlib.pyplot as plt
17
+ from skimage.morphology import skeletonize
18
+ import json
19
+ import plotly.graph_objects as go
20
+ import plotly.express as px
21
+ import plotly.io as pio
22
+ import os
23
+ pio.kaleido.scope.mathjax = None
24
+
25
+
26
+ def front_error(prediction, label):
27
+ """
28
+ prediction: mask of the front prediction (WxH)
29
+ label: mask of the front label (WxH)
30
+
31
+ returns the mean distance of the two fronts
32
+ """
33
+ front_is_present_flag = True
34
+ polyline_pred = np.nonzero(prediction)
35
+ polyline_label = np.nonzero(label)
36
+
37
+ # Generate Nx2 matrix of pixels that represent the front
38
+ pred_coords = np.array(list(zip(polyline_pred[0], polyline_pred[1])))
39
+ mask_coords = np.array(list(zip(polyline_label[0], polyline_label[1])))
40
+
41
+ # Return NaN if front is not detected in either pred or mask
42
+ if pred_coords.shape[0] == 0 or mask_coords.shape[0] == 0:
43
+ front_is_present_flag = False
44
+ return front_is_present_flag, np.nan, np.nan, np.nan
45
+
46
+ # Generate the pairwise distances between each point and the closest point in the other array
47
+
48
+ distances1 = distance.cdist(pred_coords, mask_coords).min(axis=1)
49
+
50
+ distances2 = distance.cdist(mask_coords, pred_coords).min(axis=1)
51
+ distances = np.concatenate((distances1, distances2))
52
+
53
+ # Calculate the average distance between each point and the closest point in the other array
54
+ mean_distance = np.mean(distances)
55
+ median_distance = np.median(distances)
56
+ return front_is_present_flag, mean_distance, median_distance, distances
57
+
58
+
59
+ def multi_class_metric(metric_function, complete_predicted_mask, complete_target):
60
+ metrics = []
61
+ metric_na, metric_stone, metric_glacier, metric_ocean = metric_function(np.ndarray.flatten(complete_target),
62
+ np.ndarray.flatten(complete_predicted_mask),
63
+ average=None)
64
+ metric_macro_average = (metric_na + metric_stone + metric_glacier + metric_ocean) / 4
65
+ metrics.append(metric_macro_average)
66
+ metrics.append(metric_na)
67
+ metrics.append(metric_stone)
68
+ metrics.append(metric_glacier)
69
+ metrics.append(metric_ocean)
70
+ return metrics
71
+
72
+
73
+ def get_matching_out_of_folder(file_name, folder):
74
+ files = os.listdir(folder)
75
+ matching_files = [a for a in files if
76
+ re.match(pattern=os.path.split(file_name)[1][:-4], string=os.path.split(a)[1])]
77
+ if len(matching_files) > 1:
78
+ print("Something went wrong!")
79
+ print(f"targets_matching: {matching_files}")
80
+ if len(matching_files) < 1:
81
+ print("Something went wrong! No matches found")
82
+ return matching_files[0]
83
+
84
+
85
+ def turn_colors_to_class_labels_zones(mask):
86
+ mask_class_labels = np.copy(mask)
87
+ mask_class_labels[mask == 0] = 0
88
+ mask_class_labels[mask == 64] = 1
89
+ mask_class_labels[mask == 127] = 2
90
+ mask_class_labels[mask == 254] = 3
91
+ return mask_class_labels
92
+
93
+
94
+ def turn_colors_to_class_labels_front(mask):
95
+ mask_class_labels = np.copy(mask)
96
+ mask_class_labels[mask == 0] = 0
97
+ mask_class_labels[mask == 255] = 1
98
+ return mask_class_labels
99
+
100
+
101
+ def print_zone_metrics(metric_name, list_of_metrics):
102
+ metrics = [metric for [metric, _, _, _, _] in list_of_metrics if not np.isnan(metric)]
103
+ metrics_na = [metric_na for [_, metric_na, _, _, _] in list_of_metrics if not np.isnan(metric_na)]
104
+ metrics_stone = [metric_stone for [_, _, metric_stone, _, _] in list_of_metrics if not np.isnan(metric_stone)]
105
+ metrics_glacier = [metric_glacier for [_, _, _, metric_glacier, _] in list_of_metrics if not np.isnan(metric_glacier)]
106
+ metrics_ocean = [metric_ocean for [_, _, _, _, metric_ocean] in list_of_metrics if not np.isnan(metric_ocean)]
107
+ result = {}
108
+ print(f"Average {metric_name}: {sum(metrics) / len(metrics)}")
109
+ result[f'Average_{metric_name}'] = sum(metrics) / len(metrics)
110
+ print(f"Average {metric_name} NA Area: {sum(metrics_na) / len(metrics_na)}")
111
+ result[f'Average_{metric_name}_NA_Area'] = sum(metrics_na) / len(metrics_na)
112
+ print(f"Average {metric_name} Stone: {sum(metrics_stone) / len(metrics_stone)}")
113
+ result[f"Average_{metric_name}_Stone"] =sum(metrics_stone) / len(metrics_stone)
114
+ print(f"Average {metric_name} Glacier: {sum(metrics_glacier) / len(metrics_glacier)}")
115
+ result[f"Average_{metric_name}_Glacier"] = sum(metrics_glacier) / len(metrics_glacier)
116
+ print(f"Average {metric_name} Ocean and Ice Melange: {sum(metrics_ocean) / len(metrics_ocean)}")
117
+ result[f"Average_{metric_name}_Ocean_and_Ice_Melange"] = sum(metrics_ocean) / len(metrics_ocean)
118
+
119
+ return result
120
+
121
+ def print_front_metric(name, metric):
122
+ result = {}
123
+ print(f"Average {name}: {sum(metric) / len(metric)}")
124
+ result[f"Average {name}"] = sum(metric) / len(metric)
125
+ return result
126
+
127
+
128
+ def mask_prediction_with_bounding_box(post_complete_predicted_mask, file_name, bounding_boxes_directory):
129
+ matching_bounding_box_file = get_matching_out_of_folder(file_name, bounding_boxes_directory)
130
+ with open(os.path.join(bounding_boxes_directory, matching_bounding_box_file)) as f:
131
+ coord_file_lines = f.readlines()
132
+ left_upper_corner_x, left_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[1].split(",")]
133
+ left_lower_corner_x, left_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[2].split(",")]
134
+ right_lower_corner_x, right_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[3].split(",")]
135
+ right_upper_corner_x, right_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[4].split(",")]
136
+
137
+ # Make sure the Bounding Box coordinates are within the image
138
+ if left_upper_corner_x < 0: left_upper_corner_x = 0
139
+ if left_lower_corner_x < 0: left_lower_corner_x = 0
140
+ if right_upper_corner_x > len(post_complete_predicted_mask[0]): right_upper_corner_x = len(post_complete_predicted_mask[0]) - 1
141
+ if right_lower_corner_x > len(post_complete_predicted_mask[0]): right_lower_corner_x = len(post_complete_predicted_mask[0]) - 1
142
+ if left_upper_corner_y > len(post_complete_predicted_mask): left_upper_corner_y = len(post_complete_predicted_mask) - 1
143
+ if left_lower_corner_y < 0: left_lower_corner_y = 0
144
+ if right_upper_corner_y > len(post_complete_predicted_mask): right_upper_corner_y = len(post_complete_predicted_mask) - 1
145
+ if right_lower_corner_y < 0: right_lower_corner_y = 0
146
+
147
+ # remember cv2 images have the shape (height, width)
148
+ post_complete_predicted_mask[:right_lower_corner_y, :] = 0.0
149
+ post_complete_predicted_mask[left_upper_corner_y:, :] = 0.0
150
+ post_complete_predicted_mask[:, :left_upper_corner_x] = 0.0
151
+ post_complete_predicted_mask[:, right_lower_corner_x:] = 0.0
152
+
153
+ return post_complete_predicted_mask
154
+
155
+
156
+ def post_processing(target_masks, complete_predicted_masks, bounding_boxes_directory, complete_test_directory):
157
+ meter_threshold = 750 # in meter
158
+ print("Post-processing ...\n\n")
159
+ for file_name in complete_predicted_masks:
160
+ prediction_name = file_name
161
+ if file_name.endswith('_zone.png'):
162
+ file_name = file_name[:-len("_zone.png")] + ".png"
163
+ if file_name.endswith('_front.png'):
164
+ file_name = file_name[:-len("front.png")] +".png"
165
+
166
+ print(f"File: {file_name}")
167
+ resolution = int(os.path.split(file_name)[1][:-4].split('_')[-3])
168
+ # pixel_threshold (pixel) * resolution (m/pixel) = meter_threshold (m)
169
+ pixel_threshold = meter_threshold / resolution
170
+ complete_predicted_mask = cv2.imread(os.path.join(complete_test_directory, prediction_name).__str__(), cv2.IMREAD_GRAYSCALE)
171
+
172
+ if target_masks == "zones":
173
+ post_complete_predicted_mask = postprocess_zone_segmenation(complete_predicted_mask)
174
+ post_complete_predicted_mask = extract_front_from_zones(post_complete_predicted_mask, pixel_threshold)
175
+ else:
176
+ complete_predicted_mask_class_labels = turn_colors_to_class_labels_front(complete_predicted_mask)
177
+ post_complete_predicted_mask = postprocess_front_segmenation(complete_predicted_mask_class_labels, pixel_threshold)
178
+ post_complete_predicted_mask = post_complete_predicted_mask * 255
179
+
180
+ post_complete_predicted_mask = mask_prediction_with_bounding_box(post_complete_predicted_mask, file_name,
181
+ bounding_boxes_directory)
182
+ cv2.imwrite(os.path.join(complete_postprocessed_test_directory, file_name), post_complete_predicted_mask)
183
+
184
+
185
+ def calculate_front_delineation_metric(complete_postprocessed_test_directory, post_processed_predicted_masks, directory_of_target_fronts, bounding_boxes_directory):
186
+ list_of_mean_front_errors = []
187
+ list_of_median_front_errors = []
188
+ list_of_all_front_errors = []
189
+ number_of_images_with_no_predicted_front = 0
190
+ results = {}
191
+ for file_name in post_processed_predicted_masks[:]:
192
+
193
+ post_processed_predicted_mask = cv2.imread(
194
+ os.path.join(complete_postprocessed_test_directory, file_name).__str__(), cv2.IMREAD_GRAYSCALE)
195
+ matching_target_file = get_matching_out_of_folder(file_name, directory_of_target_fronts)
196
+ target_front = cv2.imread(os.path.join(directory_of_target_fronts, matching_target_file).__str__(),
197
+ cv2.IMREAD_GRAYSCALE)
198
+ if file_name.endswith("_front.png"):
199
+ resolution = int(os.path.split(file_name)[1][:-4].split('_')[-4])
200
+ else:
201
+ resolution = int(os.path.split(file_name)[1][:-4].split('_')[-3])
202
+
203
+ # images need to be turned into a Tensor [0, ..., n_classes-1]
204
+ post_processed_predicted_mask_class_labels = turn_colors_to_class_labels_front(post_processed_predicted_mask)
205
+ target_front_class_labels = turn_colors_to_class_labels_front(target_front)
206
+
207
+ if file_name.endswith('_front.png'):
208
+ post_processed_predicted_mask_class_labels = mask_prediction_with_bounding_box(post_processed_predicted_mask_class_labels, file_name[:-len('_front.png')]+'.png', bounding_boxes_directory)
209
+ post_processed_predicted_mask_class_labels = skeletonize(post_processed_predicted_mask_class_labels)
210
+ front_is_present_flag, mean_error, median_error, errors = front_error(
211
+ post_processed_predicted_mask_class_labels, target_front_class_labels)
212
+
213
+ if not front_is_present_flag:
214
+ number_of_images_with_no_predicted_front += 1
215
+ else:
216
+ list_of_mean_front_errors.append(resolution * mean_error)
217
+ list_of_median_front_errors.append(resolution * median_error)
218
+ list_of_all_front_errors = np.concatenate((list_of_all_front_errors, resolution * errors))
219
+ print(f"Number of images with no predicted front: {number_of_images_with_no_predicted_front}")
220
+ results["Number_no_front"] = number_of_images_with_no_predicted_front
221
+ if number_of_images_with_no_predicted_front >= len(post_processed_predicted_masks):
222
+ print(f"Number of images with no predicted front is equal to complete set of images. No metrics can be calculated.")
223
+ return [], {}
224
+ list_of_mean_front_errors_without_nan = [front_error for front_error in list_of_mean_front_errors if
225
+ not np.isnan(front_error)]
226
+ list_of_median_front_errors_without_nan = [front_error for front_error in list_of_median_front_errors if
227
+ not np.isnan(front_error)]
228
+ print(f"Mean-mean distance error (in meters): {sum(list_of_mean_front_errors_without_nan) / len(list_of_mean_front_errors_without_nan)}")
229
+ results["Mean_mean_distance"] = sum(list_of_mean_front_errors_without_nan) / len(list_of_mean_front_errors_without_nan)
230
+ print(f"Mean-median distance error (in meters): {sum(list_of_median_front_errors_without_nan) / len(list_of_median_front_errors_without_nan)}")
231
+ results["Mean_median_distance"] = sum(list_of_median_front_errors_without_nan) / len(list_of_median_front_errors_without_nan)
232
+
233
+ list_of_mean_front_errors_without_nan = np.array(list_of_mean_front_errors_without_nan)
234
+ list_of_median_front_errors_without_nan = np.array(list_of_median_front_errors_without_nan)
235
+ print(f"Median-mean distance error (in meters): {np.median(list_of_mean_front_errors_without_nan)}")
236
+ results["Median_mean_distance"] = np.median(list_of_mean_front_errors_without_nan)
237
+ print(f"Median-median distance error (in meters): {np.median(list_of_median_front_errors_without_nan)}")
238
+ results["Median_median_distance"] = np.median(list_of_median_front_errors_without_nan)
239
+
240
+ list_of_all_front_errors_without_nan = [front_error for front_error in list_of_all_front_errors if
241
+ not np.isnan(front_error)]
242
+ list_of_all_front_errors_without_nan = np.array(list_of_all_front_errors_without_nan)
243
+ confidence_interval = st.norm.interval(alpha=0.95,
244
+ loc=np.mean(list_of_all_front_errors_without_nan),
245
+ scale=st.sem(list_of_all_front_errors_without_nan))
246
+ mean = np.mean(list_of_all_front_errors_without_nan)
247
+ std = np.std(list_of_all_front_errors_without_nan)
248
+ print(f"Confidence interval: {confidence_interval}, mean: {mean}, standard deviation: {std}")
249
+ results["Confidence_interval"] = confidence_interval
250
+ results['mean'] = mean
251
+ results['standard_deviation'] = std
252
+ return list_of_mean_front_errors_without_nan, results
253
+
254
+ def calculate_segmentation_metrics(target_mask_modality, complete_predicted_masks, complete_test_directory,
255
+ directory_of_complete_targets):
256
+ print("Calculate segmentation metrics ...\n\n")
257
+ list_of_ious = []
258
+ list_of_precisions = []
259
+ list_of_recalls = []
260
+ list_of_f1_scores = []
261
+ result = {}
262
+ for file_name in complete_predicted_masks:
263
+ print(f"File: {file_name}")
264
+ complete_predicted_mask = cv2.imread(os.path.join(complete_test_directory, file_name).__str__(),
265
+ cv2.IMREAD_GRAYSCALE)
266
+ matching_target_file = get_matching_out_of_folder(file_name, directory_of_complete_targets)
267
+ complete_target = cv2.imread(os.path.join(directory_of_complete_targets, matching_target_file).__str__(),
268
+ cv2.IMREAD_GRAYSCALE)
269
+
270
+ if target_mask_modality == "zones":
271
+ # images need to be turned into a Tensor [0, ..., n_classes-1]
272
+ complete_predicted_mask_class_labels = turn_colors_to_class_labels_zones(complete_predicted_mask)
273
+ complete_target_class_labels = turn_colors_to_class_labels_zones(complete_target)
274
+ # Segmentation evaluation metrics
275
+ list_of_ious.append(
276
+ multi_class_metric(jaccard_score, complete_predicted_mask_class_labels, complete_target_class_labels))
277
+ list_of_precisions.append(
278
+ multi_class_metric(precision_score, complete_predicted_mask_class_labels, complete_target_class_labels))
279
+ list_of_recalls.append(
280
+ multi_class_metric(recall_score, complete_predicted_mask_class_labels, complete_target_class_labels))
281
+ list_of_f1_scores.append(
282
+ multi_class_metric(f1_score, complete_predicted_mask_class_labels, complete_target_class_labels))
283
+ else:
284
+ # images need to be turned into a Tensor [0, ..., n_classes-1]
285
+ complete_predicted_mask_class_labels = turn_colors_to_class_labels_front(complete_predicted_mask)
286
+ complete_target_class_labels = turn_colors_to_class_labels_front(complete_target)
287
+ # Segmentation evaluation metrics
288
+ flattened_complete_target_class_labels = np.ndarray.flatten(complete_target_class_labels)
289
+ flattened_complete_predicted_mask_class_labels = np.ndarray.flatten(complete_predicted_mask_class_labels)
290
+ list_of_ious.append(
291
+ jaccard_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
292
+ list_of_precisions.append(
293
+ precision_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
294
+ list_of_recalls.append(
295
+ recall_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
296
+ list_of_f1_scores.append(
297
+ f1_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
298
+
299
+ if target_mask_modality == "zones":
300
+ result_precision = print_zone_metrics("Precision", list_of_precisions)
301
+ result["Zone_Precision"] = result_precision
302
+ result_recal = print_zone_metrics("Recall", list_of_recalls)
303
+ result["Zone_Recall"] = result_recal
304
+ result_f1 = print_zone_metrics("F1 Score", list_of_f1_scores)
305
+ result["Zone_F1"] = result_f1
306
+ result_iou = print_zone_metrics("IoU", list_of_ious)
307
+ result["Zone_IoU"] = result_iou
308
+ else:
309
+ if len(list_of_precisions) > 0:
310
+ result_precsions = print_front_metric("Precision", list_of_precisions)
311
+ result["Front_Precsion"] = result_precsions
312
+ if len(list_of_recalls) > 0:
313
+ result_recall = print_front_metric("Recall", list_of_recalls)
314
+ result["Front_Recall"] = result_recall
315
+ if len(list_of_f1_scores):
316
+ result_f1 = print_front_metric("F1 Score", list_of_f1_scores)
317
+ result["Front_F1"] = result_f1
318
+ if len(list_of_ious)>0:
319
+ result_iou = print_front_metric("IoU", list_of_ious)
320
+ result["Front_IoU"] = result_iou
321
+
322
+ return result
323
+
324
+ def check_whether_winter_half_year(name):
325
+ split_name = name[:-4].split('_')
326
+ if split_name[0] == "COL" or split_name[0] == "JAC":
327
+ nord_halbkugel = True
328
+ else: # Jorum, Maple, Crane, SI, DBE
329
+ nord_halbkugel = False
330
+ month = int(split_name[1].split('-')[1])
331
+ if nord_halbkugel:
332
+ if month < 4 or month > 8:
333
+ winter = True
334
+ else:
335
+ winter = False
336
+ else:
337
+ if month < 4 or month > 8:
338
+ winter = False
339
+ else:
340
+ winter = True
341
+ return winter
342
+
343
+
344
+ def front_delineation_metric(modality, complete_postprocessed_test_directory, directory_of_target_fronts, bounding_boxes_directory):
345
+ print("Calculating distance errors ...\n\n")
346
+ if modality == 'front':
347
+ post_processed_predicted_masks = list(file for file in os.listdir(complete_postprocessed_test_directory) if file.endswith('_front.png'))
348
+
349
+ elif modality == 'zone':
350
+ post_processed_predicted_masks = list(file for file in os.listdir(complete_postprocessed_test_directory))
351
+
352
+ print("")
353
+ print("####################################################################")
354
+ print(f"# Results for all images")
355
+ print("####################################################################")
356
+ fig = px.box(None, points="all", template="plotly_white", log_x=True, height=300)
357
+ G10 = px.colors.qualitative.G10
358
+ width = 0.5
359
+ list_of_mean_front_errors_without_nan, result_all = calculate_front_delineation_metric(complete_postprocessed_test_directory, post_processed_predicted_masks, directory_of_target_fronts, bounding_boxes_directory)
360
+ np.savetxt(os.path.join(complete_postprocessed_test_directory, os.pardir, "distance_errors.txt"), list_of_mean_front_errors_without_nan)
361
+ fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width))
362
+
363
+ results = {}
364
+ results['Result_all'] = result_all
365
+
366
+ # Season subsetting
367
+ for season in ["winter", "summer"]:
368
+ print("")
369
+ print("####################################################################")
370
+ print(f"# Results for only images in {season}")
371
+ print("####################################################################")
372
+ subset_of_predictions = []
373
+ for file_name in post_processed_predicted_masks:
374
+ winter = check_whether_winter_half_year(file_name)
375
+ if (winter and season == "summer") or (not winter and season == "winter"):
376
+ continue
377
+ subset_of_predictions.append(file_name)
378
+ if len(subset_of_predictions) == 0: continue
379
+ all_errors, result_season = calculate_front_delineation_metric(complete_postprocessed_test_directory, subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
380
+ if season == 'winter':
381
+ color = G10[9]
382
+ else:
383
+ color = G10[8]
384
+ print(season, np.mean(all_errors), np.std(all_errors))
385
+ fig.add_trace(go.Box(x=all_errors, marker_color=color, boxmean=True, boxpoints='all', name=season, width=width, legendrank=0))
386
+
387
+ results[season] = result_season
388
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
389
+ fig.update_xaxes(title='front delineation error (m)')
390
+ fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['summer','winter','all']})
391
+ fig.update_traces(orientation='h') # horizontal box plots
392
+ fig.write_image("create_plots_new/output/error_season.pdf", format='pdf')
393
+
394
+
395
+ # Glacier subsetting
396
+ fig = px.box(None, points="all", template="plotly_white", log_x=True, height=300)
397
+ fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width,legendrank=7))
398
+ color = {'COL': G10[3], 'Mapple': G10[4]}
399
+ for glacier in ["Mapple", "COL", "Crane", "DBE", "JAC", "Jorum", "SI"]:
400
+ print("")
401
+ print("####################################################################")
402
+ print(f"# Results for only images from {glacier}")
403
+ print("####################################################################")
404
+ subset_of_predictions = []
405
+ for file_name in post_processed_predicted_masks:
406
+ if not file_name[:-4].split('_')[0] == glacier:
407
+ continue
408
+ subset_of_predictions.append(file_name)
409
+ if len(subset_of_predictions) == 0: continue
410
+ all_errors, result_glacier = calculate_front_delineation_metric(complete_postprocessed_test_directory,subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
411
+ print(glacier, np.mean(all_errors), np.std(all_errors))
412
+ fig.add_trace(
413
+ go.Box(x=all_errors, marker_color=color[glacier], boxmean=True, boxpoints='all', name=glacier, width=width, ))
414
+ results[glacier] = {}
415
+ results[glacier]['all'] = result_glacier
416
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
417
+ fig.update_xaxes(title='front delineation error (m)')
418
+ fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['Mapple', 'COL', 'all']})
419
+ fig.update_traces(orientation='h') # horizontal box plots
420
+ fig.write_image("create_plots_new/output/error_glacier.pdf", format='pdf')
421
+
422
+ color = {'ERS': G10[9], 'RSAT': G10[1], 'ENVISAT': G10[8], 'PALSAR':G10[3], 'TSX':G10[4], 'TDX':G10[5], 'S1':G10[6]}
423
+ # Sensor subsetting
424
+ fig = px.box(None, points="all", template="plotly_white", log_x=True, height=500)
425
+ fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width))
426
+
427
+ for sensor in ["RSAT", "S1", "ENVISAT", "ERS", "PALSAR", "TSX", "TDX"]:
428
+ print("")
429
+ print("####################################################################")
430
+ print(f"# Results for only images from {sensor}")
431
+ print("####################################################################")
432
+ subset_of_predictions = []
433
+ for file_name in post_processed_predicted_masks:
434
+ if not file_name[:-4].split('_')[2] == sensor:
435
+ continue
436
+ subset_of_predictions.append(file_name)
437
+ if len(subset_of_predictions) == 0: continue
438
+ all_errors, result_sensor = calculate_front_delineation_metric(complete_postprocessed_test_directory,subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
439
+ print(sensor, np.mean(all_errors), np.std(all_errors))
440
+ fig.add_trace(
441
+ go.Box(x=all_errors, marker_color=color[sensor], boxmean=True, boxpoints='all', name=sensor, width=width))
442
+ results[sensor] = result_sensor
443
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
444
+ fig.update_xaxes(title='front delineation error (m)')
445
+ fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':[ 'S1','TDX','TSX','PALSAR', 'ENVISAT', 'ERS','all']})
446
+ fig.update_traces(orientation='h') # horizontal box plots
447
+ fig.write_image("create_plots_new/output/error_satellite.pdf", format='pdf')
448
+ exit()
449
+ # Resolution subsetting
450
+ fig = px.box(None, points="all", template="plotly_white", log_x=True)
451
+ fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width))
452
+ color ={20: G10[9], 17:G10[8], 7:G10[3]}
453
+ for res in [20, 17, 7]:
454
+ print("")
455
+ print("####################################################################")
456
+ print(f"# Results for only images with a resolution of {res}")
457
+ print("####################################################################")
458
+ subset_of_predictions = []
459
+ for file_name in post_processed_predicted_masks:
460
+ if not int(file_name[:-4].split('_')[3]) == res:
461
+ continue
462
+ subset_of_predictions.append(file_name)
463
+ if len(subset_of_predictions) == 0: continue
464
+ all_errors, result_res = calculate_front_delineation_metric(complete_postprocessed_test_directory,subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
465
+ fig.add_trace(
466
+ go.Box(x=all_errors, marker_color=color[res], boxmean=True, boxpoints='all', name=res, width=width))
467
+ results[res] = result_res
468
+ fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
469
+ fig.update_xaxes(title='front delineation error (m)')
470
+ fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['7', '17', '20','all']})
471
+ fig.update_traces(orientation='h') # horizontal box plots
472
+ fig.write_image("create_plots_new/output/error_resolution.pdf", format='pdf')
473
+
474
+ # Season and glacier subsetting
475
+ for glacier in ["Mapple", "COL", "Crane", "DBE", "JAC", "Jorum", "SI"]:
476
+ for season in ["winter", "summer"]:
477
+ print("")
478
+ print("####################################################################")
479
+ print(f"# Results for only images in {season} and from {glacier}")
480
+ print("####################################################################")
481
+ subset_of_predictions = []
482
+ for file_name in post_processed_predicted_masks:
483
+ winter = check_whether_winter_half_year(file_name)
484
+ if not file_name[:-4].split('_')[0] == glacier:
485
+ continue
486
+ if (winter and season == "summer") or (not winter and season == "winter"):
487
+ continue
488
+ subset_of_predictions.append(file_name)
489
+ if len(subset_of_predictions) == 0: continue
490
+ _, results_gla_season = calculate_front_delineation_metric(complete_postprocessed_test_directory, subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
491
+ results[glacier][season] = results_gla_season
492
+
493
+ return results
494
+
495
+ def visualizations(complete_postprocessed_test_directory, directory_of_target_fronts, directory_of_sar_images,
496
+ bounding_boxes_directory, visualizations_dir):
497
+ print("Creating visualizations ...\n\n")
498
+ post_processed_predicted_masks = os.listdir(os.path.join(complete_postprocessed_test_directory))
499
+ for file_name in post_processed_predicted_masks:
500
+ if not file_name.endswith('.png'):
501
+ continue
502
+ resolution = int(os.path.split(file_name)[1][:-4].split('_')[-3])
503
+ if resolution < 10:
504
+ dilation = 5
505
+ else:
506
+ dilation = 3
507
+
508
+ if file_name.endswith('_front.png'):
509
+ post_processed_predicted_mask = cv2.imread(os.path.join(complete_postprocessed_test_directory, file_name).__str__(), cv2.IMREAD_GRAYSCALE)
510
+ post_processed_predicted_mask = mask_prediction_with_bounding_box(post_processed_predicted_mask, file_name[:-len('_front.png')]+'.png', bounding_boxes_directory)
511
+ post_processed_predicted_mask[post_processed_predicted_mask > 1] =1
512
+ post_processed_predicted_mask_skeletonized = skeletonize(post_processed_predicted_mask)
513
+ post_processed_predicted_mask = np.zeros(post_processed_predicted_mask_skeletonized.shape)
514
+ post_processed_predicted_mask[post_processed_predicted_mask_skeletonized] = 255
515
+ matching_target_file = get_matching_out_of_folder(file_name[:-len('_front.png')]+'.png', directory_of_target_fronts)
516
+ target_front = cv2.imread(os.path.join(directory_of_target_fronts, matching_target_file).__str__(), cv2.IMREAD_GRAYSCALE)
517
+ matching_sar_file = get_matching_out_of_folder(file_name[:-len('_front.png')]+'.png', directory_of_sar_images)
518
+ sar_image = cv2.imread(os.path.join(directory_of_sar_images, matching_sar_file).__str__(), cv2.IMREAD_GRAYSCALE)
519
+ elif file_name.endswith('_zone.png'):
520
+ continue
521
+ elif file_name.endswith('_recon.png'):
522
+ continue
523
+ else:
524
+ post_processed_predicted_mask = cv2.imread(
525
+ os.path.join(complete_postprocessed_test_directory, file_name).__str__(), cv2.IMREAD_GRAYSCALE)
526
+ matching_target_file = get_matching_out_of_folder(file_name, directory_of_target_fronts)
527
+ target_front = cv2.imread(os.path.join(directory_of_target_fronts, matching_target_file).__str__(),cv2.IMREAD_GRAYSCALE)
528
+ matching_sar_file = get_matching_out_of_folder(file_name, directory_of_sar_images)
529
+ sar_image = cv2.imread(os.path.join(directory_of_sar_images, matching_sar_file).__str__(),cv2.IMREAD_GRAYSCALE)
530
+
531
+
532
+ predicted_front = np.array(post_processed_predicted_mask)
533
+ ground_truth_front = np.array(target_front)
534
+ kernel = np.ones((dilation, dilation), np.uint8)
535
+ predicted_front = cv2.dilate(predicted_front, kernel, iterations=1)
536
+ ground_truth_front = cv2.dilate(ground_truth_front, kernel, iterations=1)
537
+
538
+ sar_image = np.array(sar_image)
539
+ sar_image_rgb = skimage.color.gray2rgb(sar_image)
540
+ sar_image_rgb = np.uint8(sar_image_rgb)
541
+
542
+ sar_image_rgb[predicted_front > 0] = [0, 255, 255] # b, g, r
543
+ sar_image_rgb[ground_truth_front > 0] = [255, 51, 51]
544
+ correct_prediction = np.logical_and(predicted_front, ground_truth_front)
545
+ sar_image_rgb[correct_prediction > 0] = [255, 0, 255] # [51, 255, 51] # [0, 153, 0]
546
+
547
+ # Insert Bounding Box
548
+ matching_bounding_box_file = get_matching_out_of_folder(file_name, bounding_boxes_directory)
549
+ with open(os.path.join(bounding_boxes_directory, matching_bounding_box_file)) as f:
550
+ coord_file_lines = f.readlines()
551
+ left_upper_corner_x, left_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[1].split(",")]
552
+ left_lower_corner_x, left_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[2].split(",")]
553
+ right_lower_corner_x, right_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[3].split(",")]
554
+ right_upper_corner_x, right_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[4].split(",")]
555
+
556
+ bounding_box = np.zeros((len(sar_image_rgb), len(sar_image_rgb[0])))
557
+ if left_upper_corner_x < 0: left_upper_corner_x = 0
558
+ if left_lower_corner_x < 0: left_lower_corner_x = 0
559
+ if right_upper_corner_x > len(sar_image_rgb[0]): right_upper_corner_x = len(sar_image_rgb[0]) - 1
560
+ if right_lower_corner_x > len(sar_image_rgb[0]): right_lower_corner_x = len(sar_image_rgb[0]) - 1
561
+ if left_upper_corner_y > len(sar_image_rgb): left_upper_corner_y = len(sar_image_rgb) - 1
562
+ if left_lower_corner_y < 0: left_lower_corner_y = 0
563
+ if right_upper_corner_y > len(sar_image_rgb): right_upper_corner_y = len(sar_image_rgb) - 1
564
+ if right_lower_corner_y < 0: right_lower_corner_y = 0
565
+
566
+ bounding_box[left_upper_corner_y, left_upper_corner_x:right_upper_corner_x] = 1
567
+ bounding_box[left_lower_corner_y, left_lower_corner_x:right_lower_corner_x] = 1
568
+ bounding_box[left_lower_corner_y:left_upper_corner_y, left_upper_corner_x] = 1
569
+ bounding_box[right_lower_corner_y:right_upper_corner_y, right_lower_corner_x] = 1
570
+ bounding_box = cv2.dilate(bounding_box, kernel, iterations=1)
571
+ sar_image_rgb[bounding_box > 0] = [255, 255, 0]
572
+
573
+ cv2.imwrite(os.path.join(visualizations_dir, file_name), sar_image_rgb)
574
+
575
+ def main(complete_test_directory, directory_of_complete_targets_zones, directory_of_complete_targets_fronts, directory_of_sar_images):
576
+ # ###############################################################################################
577
+ # CALCULATE SEGMENTATION METRICS (IoU & Hausdorff Distance)
578
+ # ###############################################################################################
579
+ complete_predicted_masks_zones = list(file for file in os.listdir(complete_test_directory) if file.endswith('_zone.png'))
580
+ complete_predicted_masks_fronts = list(file for file in os.listdir(complete_test_directory) if file.endswith('_front.png'))
581
+ src = Path(directory_of_sar_images).parent.parent.parent
582
+ bounding_boxes_directory = os.path.join(src, "data_raw", "bounding_boxes")
583
+ results = {}
584
+ # only on zone
585
+
586
+ if len(complete_predicted_masks_zones) > 0:
587
+ results_seg = calculate_segmentation_metrics('zones', complete_predicted_masks_zones, complete_test_directory,
588
+ directory_of_complete_targets_zones,)
589
+ results['Zone_Segmentation'] = results_seg
590
+
591
+ if len(complete_predicted_masks_fronts) >0:
592
+ results_seg = calculate_segmentation_metrics('fronts', complete_predicted_masks_fronts,
593
+ complete_test_directory,
594
+ directory_of_complete_targets_fronts, )
595
+ results['Front_Segmentation'] = results_seg
596
+
597
+ # ###############################################################################################
598
+ # POST-PROCESSING
599
+ # ###############################################################################################
600
+ src = Path(directory_of_sar_images).parent.parent.parent
601
+ print(src)
602
+
603
+
604
+ if len(complete_predicted_masks_zones) > 0:
605
+ post_processing('zones', complete_predicted_masks_zones, bounding_boxes_directory, complete_test_directory)
606
+
607
+ # ###############################################################################################
608
+ # CALCULATE FRONT DELINEATION METRIC (Mean distance error)
609
+ # ###############################################################################################
610
+
611
+ if len(complete_predicted_masks_zones) > 0:
612
+ print("Front delineation from ZONE post processed")
613
+ results_zone = front_delineation_metric('zone', complete_postprocessed_test_directory, directory_of_complete_targets_fronts, bounding_boxes_directory)
614
+ results['Zone_Delineation'] = results_zone
615
+
616
+
617
+ if len(complete_predicted_masks_fronts) > 0:
618
+ print("Front delineation from FRONT directly")
619
+ results_front = front_delineation_metric('front', complete_test_directory, directory_of_complete_targets_fronts, bounding_boxes_directory)
620
+ results['Front_Delineation'] = results_front
621
+
622
+ results_file = open(complete_test_directory+'/eval_results.json', "w")
623
+ json.dump(results, results_file)
624
+
625
+ # ###############################################################################################
626
+ # MAKE VISUALIZATIONS
627
+ # ###############################################################################################
628
+ if len(complete_predicted_masks_zones) > 0:
629
+ visualizations(complete_postprocessed_test_directory, directory_of_complete_targets_fronts, directory_of_sar_images,
630
+ bounding_boxes_directory, visualizations_dir)
631
+
632
+ if len(complete_predicted_masks_fronts) > 0:
633
+ front_prediction_dir = complete_test_directory
634
+
635
+ visualizations(front_prediction_dir, directory_of_complete_targets_fronts, directory_of_sar_images,
636
+ bounding_boxes_directory, visualizations_dir)
637
+
638
+
639
+ if __name__ == "__main__":
640
+ print("Start Evaluation")
641
+ parser = ArgumentParser(add_help=False)
642
+ parser.add_argument('--predictions', help="Directory with predictions as png")
643
+ parser.add_argument('--labels_fronts', help="Directory with labels as png")
644
+ parser.add_argument('--labels_zones', help="Directory with labels as png")
645
+ parser.add_argument('--sar_images', help="Directory with sar images")
646
+ hparams = parser.parse_args()
647
+
648
+ complete_test_directory = hparams.predictions
649
+ complete_postprocessed_test_directory = os.path.join(complete_test_directory, "postprocessed")
650
+
651
+ os.makedirs(complete_postprocessed_test_directory, exist_ok=True)
652
+
653
+ visualizations_dir = os.path.join(complete_test_directory, "visualization")
654
+ os.makedirs(visualizations_dir, exist_ok=True)
655
+
656
+ main(hparams.predictions, hparams.labels_zones, hparams.labels_fronts, hparams.sar_images)
nnunet/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+ print("\n\nPlease cite the following paper when using nnUNet:\n\nIsensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "
3
+ "\"nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation.\" "
4
+ "Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z\n\n")
5
+ print("If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet\n")
6
+
7
+ from . import *
nnunet/configuration.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+ default_num_threads = 8 if 'nnUNet_def_n_proc' not in os.environ else int(os.environ['nnUNet_def_n_proc'])
4
+ RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD = 3 # determines what threshold to use for resampling the low resolution axis
5
+ # separately (with NN)
nnunet/dataset_conversion/Task017_BeyondCranialVaultAbdominalOrganSegmentation.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from collections import OrderedDict
17
+ from nnunet.paths import nnUNet_raw_data
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ import shutil
20
+
21
+
22
+ if __name__ == "__main__":
23
+ base = "/media/yunlu/10TB/research/other_data/Multi-Atlas Labeling Beyond the Cranial Vault/RawData/"
24
+
25
+ task_id = 17
26
+ task_name = "AbdominalOrganSegmentation"
27
+ prefix = 'ABD'
28
+
29
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
30
+
31
+ out_base = join(nnUNet_raw_data, foldername)
32
+ imagestr = join(out_base, "imagesTr")
33
+ imagests = join(out_base, "imagesTs")
34
+ labelstr = join(out_base, "labelsTr")
35
+ maybe_mkdir_p(imagestr)
36
+ maybe_mkdir_p(imagests)
37
+ maybe_mkdir_p(labelstr)
38
+
39
+ train_folder = join(base, "Training/img")
40
+ label_folder = join(base, "Training/label")
41
+ test_folder = join(base, "Test/img")
42
+ train_patient_names = []
43
+ test_patient_names = []
44
+ train_patients = subfiles(train_folder, join=False, suffix = 'nii.gz')
45
+ for p in train_patients:
46
+ serial_number = int(p[3:7])
47
+ train_patient_name = f'{prefix}_{serial_number:03d}.nii.gz'
48
+ label_file = join(label_folder, f'label{p[3:]}')
49
+ image_file = join(train_folder, p)
50
+ shutil.copy(image_file, join(imagestr, f'{train_patient_name[:7]}_0000.nii.gz'))
51
+ shutil.copy(label_file, join(labelstr, train_patient_name))
52
+ train_patient_names.append(train_patient_name)
53
+
54
+ test_patients = subfiles(test_folder, join=False, suffix=".nii.gz")
55
+ for p in test_patients:
56
+ p = p[:-7]
57
+ image_file = join(test_folder, p + ".nii.gz")
58
+ serial_number = int(p[3:7])
59
+ test_patient_name = f'{prefix}_{serial_number:03d}.nii.gz'
60
+ shutil.copy(image_file, join(imagests, f'{test_patient_name[:7]}_0000.nii.gz'))
61
+ test_patient_names.append(test_patient_name)
62
+
63
+ json_dict = OrderedDict()
64
+ json_dict['name'] = "AbdominalOrganSegmentation"
65
+ json_dict['description'] = "Multi-Atlas Labeling Beyond the Cranial Vault Abdominal Organ Segmentation"
66
+ json_dict['tensorImageSize'] = "3D"
67
+ json_dict['reference'] = "https://www.synapse.org/#!Synapse:syn3193805/wiki/217789"
68
+ json_dict['licence'] = "see challenge website"
69
+ json_dict['release'] = "0.0"
70
+ json_dict['modality'] = {
71
+ "0": "CT",
72
+ }
73
+ json_dict['labels'] = OrderedDict({
74
+ "00": "background",
75
+ "01": "spleen",
76
+ "02": "right kidney",
77
+ "03": "left kidney",
78
+ "04": "gallbladder",
79
+ "05": "esophagus",
80
+ "06": "liver",
81
+ "07": "stomach",
82
+ "08": "aorta",
83
+ "09": "inferior vena cava",
84
+ "10": "portal vein and splenic vein",
85
+ "11": "pancreas",
86
+ "12": "right adrenal gland",
87
+ "13": "left adrenal gland"}
88
+ )
89
+ json_dict['numTraining'] = len(train_patient_names)
90
+ json_dict['numTest'] = len(test_patient_names)
91
+ json_dict['training'] = [{'image': "./imagesTr/%s" % train_patient_name, "label": "./labelsTr/%s" % train_patient_name} for i, train_patient_name in enumerate(train_patient_names)]
92
+ json_dict['test'] = ["./imagesTs/%s" % test_patient_name for test_patient_name in test_patient_names]
93
+
94
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task024_Promise2012.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from collections import OrderedDict
15
+ import SimpleITK as sitk
16
+ from batchgenerators.utilities.file_and_folder_operations import *
17
+
18
+
19
+ def export_for_submission(source_dir, target_dir):
20
+ """
21
+ promise wants mhd :-/
22
+ :param source_dir:
23
+ :param target_dir:
24
+ :return:
25
+ """
26
+ files = subfiles(source_dir, suffix=".nii.gz", join=False)
27
+ target_files = [join(target_dir, i[:-7] + ".mhd") for i in files]
28
+ maybe_mkdir_p(target_dir)
29
+ for f, t in zip(files, target_files):
30
+ img = sitk.ReadImage(join(source_dir, f))
31
+ sitk.WriteImage(img, t)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ folder = "/media/fabian/My Book/datasets/promise2012"
36
+ out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task024_Promise"
37
+
38
+ maybe_mkdir_p(join(out_folder, "imagesTr"))
39
+ maybe_mkdir_p(join(out_folder, "imagesTs"))
40
+ maybe_mkdir_p(join(out_folder, "labelsTr"))
41
+ # train
42
+ current_dir = join(folder, "train")
43
+ segmentations = subfiles(current_dir, suffix="segmentation.mhd")
44
+ raw_data = [i for i in subfiles(current_dir, suffix="mhd") if not i.endswith("segmentation.mhd")]
45
+ for i in raw_data:
46
+ out_fname = join(out_folder, "imagesTr", i.split("/")[-1][:-4] + "_0000.nii.gz")
47
+ sitk.WriteImage(sitk.ReadImage(i), out_fname)
48
+ for i in segmentations:
49
+ out_fname = join(out_folder, "labelsTr", i.split("/")[-1][:-17] + ".nii.gz")
50
+ sitk.WriteImage(sitk.ReadImage(i), out_fname)
51
+
52
+ # test
53
+ current_dir = join(folder, "test")
54
+ test_data = subfiles(current_dir, suffix="mhd")
55
+ for i in test_data:
56
+ out_fname = join(out_folder, "imagesTs", i.split("/")[-1][:-4] + "_0000.nii.gz")
57
+ sitk.WriteImage(sitk.ReadImage(i), out_fname)
58
+
59
+
60
+ json_dict = OrderedDict()
61
+ json_dict['name'] = "PROMISE12"
62
+ json_dict['description'] = "prostate"
63
+ json_dict['tensorImageSize'] = "4D"
64
+ json_dict['reference'] = "see challenge website"
65
+ json_dict['licence'] = "see challenge website"
66
+ json_dict['release'] = "0.0"
67
+ json_dict['modality'] = {
68
+ "0": "MRI",
69
+ }
70
+ json_dict['labels'] = {
71
+ "0": "background",
72
+ "1": "prostate"
73
+ }
74
+ json_dict['numTraining'] = len(raw_data)
75
+ json_dict['numTest'] = len(test_data)
76
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-4], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-4]} for i in
77
+ raw_data]
78
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-4] for i in test_data]
79
+
80
+ save_json(json_dict, os.path.join(out_folder, "dataset.json"))
81
+
nnunet/dataset_conversion/Task027_AutomaticCardiacDetectionChallenge.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from collections import OrderedDict
16
+ from batchgenerators.utilities.file_and_folder_operations import *
17
+ import shutil
18
+ import numpy as np
19
+ from sklearn.model_selection import KFold
20
+
21
+
22
+ def convert_to_submission(source_dir, target_dir):
23
+ niftis = subfiles(source_dir, join=False, suffix=".nii.gz")
24
+ patientids = np.unique([i[:10] for i in niftis])
25
+ maybe_mkdir_p(target_dir)
26
+ for p in patientids:
27
+ files_of_that_patient = subfiles(source_dir, prefix=p, suffix=".nii.gz", join=False)
28
+ assert len(files_of_that_patient)
29
+ files_of_that_patient.sort()
30
+ # first is ED, second is ES
31
+ shutil.copy(join(source_dir, files_of_that_patient[0]), join(target_dir, p + "_ED.nii.gz"))
32
+ shutil.copy(join(source_dir, files_of_that_patient[1]), join(target_dir, p + "_ES.nii.gz"))
33
+
34
+
35
+ if __name__ == "__main__":
36
+ folder = "/media/fabian/My Book/datasets/ACDC/training"
37
+ folder_test = "/media/fabian/My Book/datasets/ACDC/testing/testing"
38
+ out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task027_ACDC"
39
+
40
+ maybe_mkdir_p(join(out_folder, "imagesTr"))
41
+ maybe_mkdir_p(join(out_folder, "imagesTs"))
42
+ maybe_mkdir_p(join(out_folder, "labelsTr"))
43
+
44
+ # train
45
+ all_train_files = []
46
+ patient_dirs_train = subfolders(folder, prefix="patient")
47
+ for p in patient_dirs_train:
48
+ current_dir = p
49
+ data_files_train = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
50
+ corresponding_seg_files = [i[:-7] + "_gt.nii.gz" for i in data_files_train]
51
+ for d, s in zip(data_files_train, corresponding_seg_files):
52
+ patient_identifier = d.split("/")[-1][:-7]
53
+ all_train_files.append(patient_identifier + "_0000.nii.gz")
54
+ shutil.copy(d, join(out_folder, "imagesTr", patient_identifier + "_0000.nii.gz"))
55
+ shutil.copy(s, join(out_folder, "labelsTr", patient_identifier + ".nii.gz"))
56
+
57
+ # test
58
+ all_test_files = []
59
+ patient_dirs_test = subfolders(folder_test, prefix="patient")
60
+ for p in patient_dirs_test:
61
+ current_dir = p
62
+ data_files_test = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
63
+ for d in data_files_test:
64
+ patient_identifier = d.split("/")[-1][:-7]
65
+ all_test_files.append(patient_identifier + "_0000.nii.gz")
66
+ shutil.copy(d, join(out_folder, "imagesTs", patient_identifier + "_0000.nii.gz"))
67
+
68
+
69
+ json_dict = OrderedDict()
70
+ json_dict['name'] = "ACDC"
71
+ json_dict['description'] = "cardias cine MRI segmentation"
72
+ json_dict['tensorImageSize'] = "4D"
73
+ json_dict['reference'] = "see ACDC challenge"
74
+ json_dict['licence'] = "see ACDC challenge"
75
+ json_dict['release'] = "0.0"
76
+ json_dict['modality'] = {
77
+ "0": "MRI",
78
+ }
79
+ json_dict['labels'] = {
80
+ "0": "background",
81
+ "1": "RV",
82
+ "2": "MLV",
83
+ "3": "LVC"
84
+ }
85
+ json_dict['numTraining'] = len(all_train_files)
86
+ json_dict['numTest'] = len(all_test_files)
87
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-12], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-12]} for i in
88
+ all_train_files]
89
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-12] for i in all_test_files]
90
+
91
+ save_json(json_dict, os.path.join(out_folder, "dataset.json"))
92
+
93
+ # create a dummy split (patients need to be separated)
94
+ splits = []
95
+ patients = np.unique([i[:10] for i in all_train_files])
96
+ patientids = [i[:-12] for i in all_train_files]
97
+
98
+ kf = KFold(5, True, 12345)
99
+ for tr, val in kf.split(patients):
100
+ splits.append(OrderedDict())
101
+ tr_patients = patients[tr]
102
+ splits[-1]['train'] = [i[:-12] for i in all_train_files if i[:10] in tr_patients]
103
+ val_patients = patients[val]
104
+ splits[-1]['val'] = [i[:-12] for i in all_train_files if i[:10] in val_patients]
105
+
106
+ save_pickle(splits, "/media/fabian/nnunet/Task027_ACDC/splits_final.pkl")
nnunet/dataset_conversion/Task029_LiverTumorSegmentationChallenge.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from collections import OrderedDict
16
+ import SimpleITK as sitk
17
+ from batchgenerators.utilities.file_and_folder_operations import *
18
+ from multiprocessing import Pool
19
+ import numpy as np
20
+ from nnunet.configuration import default_num_threads
21
+ from scipy.ndimage import label
22
+
23
+
24
+ def export_segmentations(indir, outdir):
25
+ niftis = subfiles(indir, suffix='nii.gz', join=False)
26
+ for n in niftis:
27
+ identifier = str(n.split("_")[-1][:-7])
28
+ outfname = join(outdir, "test-segmentation-%s.nii" % identifier)
29
+ img = sitk.ReadImage(join(indir, n))
30
+ sitk.WriteImage(img, outfname)
31
+
32
+
33
+ def export_segmentations_postprocess(indir, outdir):
34
+ maybe_mkdir_p(outdir)
35
+ niftis = subfiles(indir, suffix='nii.gz', join=False)
36
+ for n in niftis:
37
+ print("\n", n)
38
+ identifier = str(n.split("_")[-1][:-7])
39
+ outfname = join(outdir, "test-segmentation-%s.nii" % identifier)
40
+ img = sitk.ReadImage(join(indir, n))
41
+ img_npy = sitk.GetArrayFromImage(img)
42
+ lmap, num_objects = label((img_npy > 0).astype(int))
43
+ sizes = []
44
+ for o in range(1, num_objects + 1):
45
+ sizes.append((lmap == o).sum())
46
+ mx = np.argmax(sizes) + 1
47
+ print(sizes)
48
+ img_npy[lmap != mx] = 0
49
+ img_new = sitk.GetImageFromArray(img_npy)
50
+ img_new.CopyInformation(img)
51
+ sitk.WriteImage(img_new, outfname)
52
+
53
+
54
+ if __name__ == "__main__":
55
+ train_dir = "/media/fabian/DeepLearningData/tmp/LITS-Challenge-Train-Data"
56
+ test_dir = "/media/fabian/My Book/datasets/LiTS/test_data"
57
+
58
+
59
+ output_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task029_LITS"
60
+ img_dir = join(output_folder, "imagesTr")
61
+ lab_dir = join(output_folder, "labelsTr")
62
+ img_dir_te = join(output_folder, "imagesTs")
63
+ maybe_mkdir_p(img_dir)
64
+ maybe_mkdir_p(lab_dir)
65
+ maybe_mkdir_p(img_dir_te)
66
+
67
+
68
+ def load_save_train(args):
69
+ data_file, seg_file = args
70
+ pat_id = data_file.split("/")[-1]
71
+ pat_id = "train_" + pat_id.split("-")[-1][:-4]
72
+
73
+ img_itk = sitk.ReadImage(data_file)
74
+ sitk.WriteImage(img_itk, join(img_dir, pat_id + "_0000.nii.gz"))
75
+
76
+ img_itk = sitk.ReadImage(seg_file)
77
+ sitk.WriteImage(img_itk, join(lab_dir, pat_id + ".nii.gz"))
78
+ return pat_id
79
+
80
+ def load_save_test(args):
81
+ data_file = args
82
+ pat_id = data_file.split("/")[-1]
83
+ pat_id = "test_" + pat_id.split("-")[-1][:-4]
84
+
85
+ img_itk = sitk.ReadImage(data_file)
86
+ sitk.WriteImage(img_itk, join(img_dir_te, pat_id + "_0000.nii.gz"))
87
+ return pat_id
88
+
89
+ nii_files_tr_data = subfiles(train_dir, True, "volume", "nii", True)
90
+ nii_files_tr_seg = subfiles(train_dir, True, "segmen", "nii", True)
91
+
92
+ nii_files_ts = subfiles(test_dir, True, "test-volume", "nii", True)
93
+
94
+ p = Pool(default_num_threads)
95
+ train_ids = p.map(load_save_train, zip(nii_files_tr_data, nii_files_tr_seg))
96
+ test_ids = p.map(load_save_test, nii_files_ts)
97
+ p.close()
98
+ p.join()
99
+
100
+ json_dict = OrderedDict()
101
+ json_dict['name'] = "LITS"
102
+ json_dict['description'] = "LITS"
103
+ json_dict['tensorImageSize'] = "4D"
104
+ json_dict['reference'] = "see challenge website"
105
+ json_dict['licence'] = "see challenge website"
106
+ json_dict['release'] = "0.0"
107
+ json_dict['modality'] = {
108
+ "0": "CT"
109
+ }
110
+
111
+ json_dict['labels'] = {
112
+ "0": "background",
113
+ "1": "liver",
114
+ "2": "tumor"
115
+ }
116
+
117
+ json_dict['numTraining'] = len(train_ids)
118
+ json_dict['numTest'] = len(test_ids)
119
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in train_ids]
120
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_ids]
121
+
122
+ with open(os.path.join(output_folder, "dataset.json"), 'w') as f:
123
+ json.dump(json_dict, f, indent=4, sort_keys=True)
nnunet/dataset_conversion/Task032_BraTS_2018.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from multiprocessing.pool import Pool
15
+
16
+ import numpy as np
17
+ from collections import OrderedDict
18
+
19
+ from batchgenerators.utilities.file_and_folder_operations import *
20
+ from nnunet.dataset_conversion.Task043_BraTS_2019 import copy_BraTS_segmentation_and_convert_labels
21
+ from nnunet.paths import nnUNet_raw_data
22
+ import SimpleITK as sitk
23
+ import shutil
24
+
25
+
26
+ def convert_labels_back_to_BraTS(seg: np.ndarray):
27
+ new_seg = np.zeros_like(seg)
28
+ new_seg[seg == 1] = 2
29
+ new_seg[seg == 3] = 4
30
+ new_seg[seg == 2] = 1
31
+ return new_seg
32
+
33
+
34
+ def load_convert_save(filename, input_folder, output_folder):
35
+ a = sitk.ReadImage(join(input_folder, filename))
36
+ b = sitk.GetArrayFromImage(a)
37
+ c = convert_labels_back_to_BraTS(b)
38
+ d = sitk.GetImageFromArray(c)
39
+ d.CopyInformation(a)
40
+ sitk.WriteImage(d, join(output_folder, filename))
41
+
42
+
43
+ def convert_labels_back_to_BraTS_2018_2019_convention(input_folder: str, output_folder: str, num_processes: int = 12):
44
+ """
45
+ reads all prediction files (nifti) in the input folder, converts the labels back to BraTS convention and saves the
46
+ result in output_folder
47
+ :param input_folder:
48
+ :param output_folder:
49
+ :return:
50
+ """
51
+ maybe_mkdir_p(output_folder)
52
+ nii = subfiles(input_folder, suffix='.nii.gz', join=False)
53
+ p = Pool(num_processes)
54
+ p.starmap(load_convert_save, zip(nii, [input_folder] * len(nii), [output_folder] * len(nii)))
55
+ p.close()
56
+ p.join()
57
+
58
+
59
+ if __name__ == "__main__":
60
+ """
61
+ REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
62
+ """
63
+
64
+ task_name = "Task032_BraTS2018"
65
+ downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Training"
66
+
67
+ target_base = join(nnUNet_raw_data, task_name)
68
+ target_imagesTr = join(target_base, "imagesTr")
69
+ target_imagesVal = join(target_base, "imagesVal")
70
+ target_imagesTs = join(target_base, "imagesTs")
71
+ target_labelsTr = join(target_base, "labelsTr")
72
+
73
+ maybe_mkdir_p(target_imagesTr)
74
+ maybe_mkdir_p(target_imagesVal)
75
+ maybe_mkdir_p(target_imagesTs)
76
+ maybe_mkdir_p(target_labelsTr)
77
+
78
+ patient_names = []
79
+ for tpe in ["HGG", "LGG"]:
80
+ cur = join(downloaded_data_dir, tpe)
81
+ for p in subdirs(cur, join=False):
82
+ patdir = join(cur, p)
83
+ patient_name = tpe + "__" + p
84
+ patient_names.append(patient_name)
85
+ t1 = join(patdir, p + "_t1.nii.gz")
86
+ t1c = join(patdir, p + "_t1ce.nii.gz")
87
+ t2 = join(patdir, p + "_t2.nii.gz")
88
+ flair = join(patdir, p + "_flair.nii.gz")
89
+ seg = join(patdir, p + "_seg.nii.gz")
90
+
91
+ assert all([
92
+ isfile(t1),
93
+ isfile(t1c),
94
+ isfile(t2),
95
+ isfile(flair),
96
+ isfile(seg)
97
+ ]), "%s" % patient_name
98
+
99
+ shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
100
+ shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
101
+ shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
102
+ shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
103
+
104
+ copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
105
+
106
+ json_dict = OrderedDict()
107
+ json_dict['name'] = "BraTS2018"
108
+ json_dict['description'] = "nothing"
109
+ json_dict['tensorImageSize'] = "4D"
110
+ json_dict['reference'] = "see BraTS2018"
111
+ json_dict['licence'] = "see BraTS2019 license"
112
+ json_dict['release'] = "0.0"
113
+ json_dict['modality'] = {
114
+ "0": "T1",
115
+ "1": "T1ce",
116
+ "2": "T2",
117
+ "3": "FLAIR"
118
+ }
119
+ json_dict['labels'] = {
120
+ "0": "background",
121
+ "1": "edema",
122
+ "2": "non-enhancing",
123
+ "3": "enhancing",
124
+ }
125
+ json_dict['numTraining'] = len(patient_names)
126
+ json_dict['numTest'] = 0
127
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
128
+ patient_names]
129
+ json_dict['test'] = []
130
+
131
+ save_json(json_dict, join(target_base, "dataset.json"))
132
+
133
+ del tpe, cur
134
+ downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Validation"
135
+
136
+ for p in subdirs(downloaded_data_dir, join=False):
137
+ patdir = join(downloaded_data_dir, p)
138
+ patient_name = p
139
+ t1 = join(patdir, p + "_t1.nii.gz")
140
+ t1c = join(patdir, p + "_t1ce.nii.gz")
141
+ t2 = join(patdir, p + "_t2.nii.gz")
142
+ flair = join(patdir, p + "_flair.nii.gz")
143
+
144
+ assert all([
145
+ isfile(t1),
146
+ isfile(t1c),
147
+ isfile(t2),
148
+ isfile(flair),
149
+ ]), "%s" % patient_name
150
+
151
+ shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
152
+ shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
153
+ shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
154
+ shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
155
+
156
+ downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Testing_FIsensee"
157
+
158
+ for p in subdirs(downloaded_data_dir, join=False):
159
+ patdir = join(downloaded_data_dir, p)
160
+ patient_name = p
161
+ t1 = join(patdir, p + "_t1.nii.gz")
162
+ t1c = join(patdir, p + "_t1ce.nii.gz")
163
+ t2 = join(patdir, p + "_t2.nii.gz")
164
+ flair = join(patdir, p + "_flair.nii.gz")
165
+
166
+ assert all([
167
+ isfile(t1),
168
+ isfile(t1c),
169
+ isfile(t2),
170
+ isfile(flair),
171
+ ]), "%s" % patient_name
172
+
173
+ shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
174
+ shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
175
+ shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
176
+ shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))
nnunet/dataset_conversion/Task035_ISBI_MSLesionSegmentationChallenge.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import shutil
16
+ from collections import OrderedDict
17
+ import numpy as np
18
+ import SimpleITK as sitk
19
+ import multiprocessing
20
+ from batchgenerators.utilities.file_and_folder_operations import *
21
+
22
+
23
+ def convert_to_nii_gz(filename):
24
+ f = sitk.ReadImage(filename)
25
+ sitk.WriteImage(f, os.path.splitext(filename)[0] + ".nii.gz")
26
+ os.remove(filename)
27
+
28
+
29
+ def convert_for_submission(source_dir, target_dir):
30
+ files = subfiles(source_dir, suffix=".nii.gz", join=False)
31
+ maybe_mkdir_p(target_dir)
32
+ for f in files:
33
+ splitted = f.split("__")
34
+ case_id = int(splitted[1])
35
+ timestep = int(splitted[2][:-7])
36
+ t = join(target_dir, "test%02d_%02d_nnUNet.nii" % (case_id, timestep))
37
+ img = sitk.ReadImage(join(source_dir, f))
38
+ sitk.WriteImage(img, t)
39
+
40
+
41
+ if __name__ == "__main__":
42
+ # convert to nifti.gz
43
+ dirs = ['/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr',
44
+ '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs',
45
+ '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr']
46
+
47
+ p = multiprocessing.Pool(3)
48
+
49
+ for d in dirs:
50
+ nii_files = subfiles(d, suffix='.nii')
51
+ p.map(convert_to_nii_gz, nii_files)
52
+
53
+ p.close()
54
+ p.join()
55
+
56
+
57
+ def rename_files(folder):
58
+ all_files = subfiles(folder, join=False)
59
+ # there are max 14 patients per folder, starting with 1
60
+ for patientid in range(1, 15):
61
+ # there are certainly no more than 10 time steps per patient, starting with 1
62
+ for t in range(1, 10):
63
+ patient_files = [i for i in all_files if i.find("%02.0d_%02.0d_" % (patientid, t)) != -1]
64
+ if not len(patient_files) == 4:
65
+ continue
66
+
67
+ flair_file = [i for i in patient_files if i.endswith("_flair_pp.nii.gz")][0]
68
+ mprage_file = [i for i in patient_files if i.endswith("_mprage_pp.nii.gz")][0]
69
+ pd_file = [i for i in patient_files if i.endswith("_pd_pp.nii.gz")][0]
70
+ t2_file = [i for i in patient_files if i.endswith("_t2_pp.nii.gz")][0]
71
+
72
+ os.rename(join(folder, flair_file), join(folder, "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t)))
73
+ os.rename(join(folder, mprage_file), join(folder, "case__%02.0d__%02.0d_0001.nii.gz" % (patientid, t)))
74
+ os.rename(join(folder, pd_file), join(folder, "case__%02.0d__%02.0d_0002.nii.gz" % (patientid, t)))
75
+ os.rename(join(folder, t2_file), join(folder, "case__%02.0d__%02.0d_0003.nii.gz" % (patientid, t)))
76
+
77
+
78
+ for d in dirs[:-1]:
79
+ rename_files(d)
80
+
81
+
82
+ # now we have to deal with the training masks, we do it the quick and dirty way here by just creating copies of the
83
+ # training data
84
+
85
+ train_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr'
86
+
87
+ for patientid in range(1, 6):
88
+ for t in range(1, 6):
89
+ fnames_original = subfiles(train_folder, prefix="case__%02.0d__%02.0d" % (patientid, t), suffix=".nii.gz", sort=True)
90
+ for f in fnames_original:
91
+ for mask in [1, 2]:
92
+ fname_target = f[:-12] + "__mask%d" % mask + f[-12:]
93
+ shutil.copy(f, fname_target)
94
+ os.remove(f)
95
+
96
+
97
+ labels_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr'
98
+
99
+ for patientid in range(1, 6):
100
+ for t in range(1, 6):
101
+ for mask in [1, 2]:
102
+ f = join(labels_folder, "training%02d_%02d_mask%d.nii.gz" % (patientid, t, mask))
103
+ if isfile(f):
104
+ os.rename(f, join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask)))
105
+
106
+
107
+
108
+ tr_files = []
109
+ for patientid in range(1, 6):
110
+ for t in range(1, 6):
111
+ for mask in [1, 2]:
112
+ if isfile(join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))):
113
+ tr_files.append("case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))
114
+
115
+
116
+ ts_files = []
117
+ for patientid in range(1, 20):
118
+ for t in range(1, 20):
119
+ if isfile(join("/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs",
120
+ "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t))):
121
+ ts_files.append("case__%02.0d__%02.0d.nii.gz" % (patientid, t))
122
+
123
+
124
+ out_base = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/'
125
+
126
+ json_dict = OrderedDict()
127
+ json_dict['name'] = "ISBI_Lesion_Segmentation_Challenge_2015"
128
+ json_dict['description'] = "nothing"
129
+ json_dict['tensorImageSize'] = "4D"
130
+ json_dict['reference'] = "see challenge website"
131
+ json_dict['licence'] = "see challenge website"
132
+ json_dict['release'] = "0.0"
133
+ json_dict['modality'] = {
134
+ "0": "flair",
135
+ "1": "mprage",
136
+ "2": "pd",
137
+ "3": "t2"
138
+ }
139
+ json_dict['labels'] = {
140
+ "0": "background",
141
+ "1": "lesion"
142
+ }
143
+ json_dict['numTraining'] = len(subfiles(labels_folder))
144
+ json_dict['numTest'] = len(subfiles('/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs')) // 4
145
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i[:-7], "label": "./labelsTr/%s.nii.gz" % i[:-7]} for i in
146
+ tr_files]
147
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i[:-7] for i in ts_files]
148
+
149
+ save_json(json_dict, join(out_base, "dataset.json"))
150
+
151
+ case_identifiers = np.unique([i[:-12] for i in subfiles("/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task035_ISBILesionSegmentation/imagesTr", suffix='.nii.gz', join=False)])
152
+
153
+ splits = []
154
+ for f in range(5):
155
+ cases = [i for i in range(1, 6) if i != f+1]
156
+ splits.append(OrderedDict())
157
+ splits[-1]['val'] = np.array([i for i in case_identifiers if i.startswith("case__%02d__" % (f + 1))])
158
+ remaining = [i for i in case_identifiers if i not in splits[-1]['val']]
159
+ splits[-1]['train'] = np.array(remaining)
160
+
161
+ maybe_mkdir_p("/media/fabian/nnunet/Task035_ISBILesionSegmentation")
162
+ save_pickle(splits, join("/media/fabian/nnunet/Task035_ISBILesionSegmentation", "splits_final.pkl"))
nnunet/dataset_conversion/Task037_038_Chaos_Challenge.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from PIL import Image
17
+ import shutil
18
+ from collections import OrderedDict
19
+
20
+ import dicom2nifti
21
+ import numpy as np
22
+ from batchgenerators.utilities.data_splitting import get_split_deterministic
23
+ from batchgenerators.utilities.file_and_folder_operations import *
24
+ from PIL import Image
25
+ import SimpleITK as sitk
26
+ from nnunet.paths import preprocessing_output_dir, nnUNet_raw_data
27
+ from nnunet.utilities.sitk_stuff import copy_geometry
28
+ from nnunet.inference.ensemble_predictions import merge
29
+
30
+
31
+ def load_png_stack(folder):
32
+ pngs = subfiles(folder, suffix="png")
33
+ pngs.sort()
34
+ loaded = []
35
+ for p in pngs:
36
+ loaded.append(np.array(Image.open(p)))
37
+ loaded = np.stack(loaded, 0)[::-1]
38
+ return loaded
39
+
40
+
41
+ def convert_CT_seg(loaded_png):
42
+ return loaded_png.astype(np.uint16)
43
+
44
+
45
+ def convert_MR_seg(loaded_png):
46
+ result = np.zeros(loaded_png.shape)
47
+ result[(loaded_png > 55) & (loaded_png <= 70)] = 1 # liver
48
+ result[(loaded_png > 110) & (loaded_png <= 135)] = 2 # right kidney
49
+ result[(loaded_png > 175) & (loaded_png <= 200)] = 3 # left kidney
50
+ result[(loaded_png > 240) & (loaded_png <= 255)] = 4 # spleen
51
+ return result
52
+
53
+
54
+ def convert_seg_to_intensity_task5(seg):
55
+ seg_new = np.zeros(seg.shape, dtype=np.uint8)
56
+ seg_new[seg == 1] = 63
57
+ seg_new[seg == 2] = 126
58
+ seg_new[seg == 3] = 189
59
+ seg_new[seg == 4] = 252
60
+ return seg_new
61
+
62
+
63
+ def convert_seg_to_intensity_task3(seg):
64
+ seg_new = np.zeros(seg.shape, dtype=np.uint8)
65
+ seg_new[seg == 1] = 63
66
+ return seg_new
67
+
68
+
69
+ def write_pngs_from_nifti(nifti, output_folder, converter=convert_seg_to_intensity_task3):
70
+ npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti))
71
+ seg_new = converter(npy)
72
+ for z in range(len(npy)):
73
+ Image.fromarray(seg_new[z]).save(join(output_folder, "img%03.0d.png" % z))
74
+
75
+
76
+ def convert_variant2_predicted_test_to_submission_format(folder_with_predictions,
77
+ output_folder="/home/fabian/drives/datasets/results/nnUNet/test_sets/Task038_CHAOS_Task_3_5_Variant2/ready_to_submit",
78
+ postprocessing_file="/home/fabian/drives/datasets/results/nnUNet/ensembles/Task038_CHAOS_Task_3_5_Variant2/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json"):
79
+ """
80
+ output_folder is where the extracted template is
81
+ :param folder_with_predictions:
82
+ :param output_folder:
83
+ :return:
84
+ """
85
+ postprocessing_file = "/media/fabian/Results/nnUNet/3d_fullres/Task039_CHAOS_Task_3_5_Variant2_highres/" \
86
+ "nnUNetTrainerV2__nnUNetPlansfixed/postprocessing.json"
87
+
88
+ # variant 2 treats in and out phase as two training examples, so we need to ensemble these two again
89
+ final_predictions_folder = join(output_folder, "final")
90
+ maybe_mkdir_p(final_predictions_folder)
91
+ t1_patient_names = [i.split("_")[-1][:-7] for i in subfiles(folder_with_predictions, prefix="T1", suffix=".nii.gz", join=False)]
92
+ folder_for_ensembing0 = join(output_folder, "ens0")
93
+ folder_for_ensembing1 = join(output_folder, "ens1")
94
+ maybe_mkdir_p(folder_for_ensembing0)
95
+ maybe_mkdir_p(folder_for_ensembing1)
96
+ # now copy all t1 out phases in ens0 and all in phases in ens1. Name them the same.
97
+ for t1 in t1_patient_names:
98
+ shutil.copy(join(folder_with_predictions, "T1_in_%s.npz" % t1), join(folder_for_ensembing1, "T1_%s.npz" % t1))
99
+ shutil.copy(join(folder_with_predictions, "T1_in_%s.pkl" % t1), join(folder_for_ensembing1, "T1_%s.pkl" % t1))
100
+ shutil.copy(join(folder_with_predictions, "T1_out_%s.npz" % t1), join(folder_for_ensembing0, "T1_%s.npz" % t1))
101
+ shutil.copy(join(folder_with_predictions, "T1_out_%s.pkl" % t1), join(folder_for_ensembing0, "T1_%s.pkl" % t1))
102
+ shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing0, "plans.pkl"))
103
+ shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing1, "plans.pkl"))
104
+
105
+ # there is a problem with T1_35 that I need to correct manually (different crop size, will not negatively impact results)
106
+ #ens0_softmax = np.load(join(folder_for_ensembing0, "T1_35.npz"))['softmax']
107
+ ens1_softmax = np.load(join(folder_for_ensembing1, "T1_35.npz"))['softmax']
108
+ #ens0_props = load_pickle(join(folder_for_ensembing0, "T1_35.pkl"))
109
+ #ens1_props = load_pickle(join(folder_for_ensembing1, "T1_35.pkl"))
110
+ ens1_softmax = ens1_softmax[:, :, :-1, :]
111
+ np.savez_compressed(join(folder_for_ensembing1, "T1_35.npz"), softmax=ens1_softmax)
112
+ shutil.copy(join(folder_for_ensembing0, "T1_35.pkl"), join(folder_for_ensembing1, "T1_35.pkl"))
113
+
114
+ # now call my ensemble function
115
+ merge((folder_for_ensembing0, folder_for_ensembing1), final_predictions_folder, 8, True,
116
+ postprocessing_file=postprocessing_file)
117
+ # copy t2 files to final_predictions_folder as well
118
+ t2_files = subfiles(folder_with_predictions, prefix="T2", suffix=".nii.gz", join=False)
119
+ for t2 in t2_files:
120
+ shutil.copy(join(folder_with_predictions, t2), join(final_predictions_folder, t2))
121
+
122
+ # apply postprocessing
123
+ from nnunet.postprocessing.connected_components import apply_postprocessing_to_folder, load_postprocessing
124
+ postprocessed_folder = join(output_folder, "final_postprocessed")
125
+ for_which_classes, min_valid_obj_size = load_postprocessing(postprocessing_file)
126
+ apply_postprocessing_to_folder(final_predictions_folder, postprocessed_folder,
127
+ for_which_classes, min_valid_obj_size, 8)
128
+
129
+ # now export the niftis in the weird png format
130
+ # task 3
131
+ output_dir = join(output_folder, "CHAOS_submission_template_new", "Task3", "MR")
132
+ for t1 in t1_patient_names:
133
+ output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
134
+ nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
135
+ write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3)
136
+ for t2 in t2_files:
137
+ patname = t2.split("_")[-1][:-7]
138
+ output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
139
+ nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
140
+ write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3)
141
+
142
+ # task 5
143
+ output_dir = join(output_folder, "CHAOS_submission_template_new", "Task5", "MR")
144
+ for t1 in t1_patient_names:
145
+ output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
146
+ nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
147
+ write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)
148
+ for t2 in t2_files:
149
+ patname = t2.split("_")[-1][:-7]
150
+ output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
151
+ nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
152
+ write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)
153
+
154
+
155
+
156
+ if __name__ == "__main__":
157
+ """
158
+ This script only prepares data to participate in Task 5 and Task 5. I don't like the CT task because
159
+ 1) there are
160
+ no abdominal organs in the ground truth. In the case of CT we are supposed to train only liver while on MRI we are
161
+ supposed to train all organs. This would require manual modification of nnU-net to deal with this dataset. This is
162
+ not what nnU-net is about.
163
+ 2) CT Liver or multiorgan segmentation is too easy to get external data for. Therefore the challenges comes down
164
+ to who gets the b est external data, not who has the best algorithm. Not super interesting.
165
+
166
+ Task 3 is a subtask of Task 5 so we need to prepare the data only once.
167
+ Difficulty: We need to process both T1 and T2, but T1 has 2 'modalities' (phases). nnU-Net cannot handly varying
168
+ number of input channels. We need to be creative.
169
+ We deal with this by preparing 2 Variants:
170
+ 1) pretend we have 2 modalities for T2 as well by simply stacking a copy of the data
171
+ 2) treat all MRI sequences independently, so we now have 3*20 training data instead of 2*20. In inference we then
172
+ ensemble the results for the two t1 modalities.
173
+
174
+ Careful: We need to split manually here to ensure we stratify by patient
175
+ """
176
+
177
+ root = "/media/fabian/My Book/datasets/CHAOS_challenge/Train_Sets"
178
+ root_test = "/media/fabian/My Book/datasets/CHAOS_challenge/Test_Sets"
179
+ out_base = nnUNet_raw_data
180
+ # CT
181
+ # we ignore CT because
182
+
183
+ ##############################################################
184
+ # Variant 1
185
+ ##############################################################
186
+ patient_ids = []
187
+ patient_ids_test = []
188
+
189
+ output_folder = join(out_base, "Task037_CHAOS_Task_3_5_Variant1")
190
+ output_images = join(output_folder, "imagesTr")
191
+ output_labels = join(output_folder, "labelsTr")
192
+ output_imagesTs = join(output_folder, "imagesTs")
193
+ maybe_mkdir_p(output_images)
194
+ maybe_mkdir_p(output_labels)
195
+ maybe_mkdir_p(output_imagesTs)
196
+
197
+
198
+ # Process T1 train
199
+ d = join(root, "MR")
200
+ patients = subdirs(d, join=False)
201
+ for p in patients:
202
+ patient_name = "T1_" + p
203
+ gt_dir = join(d, p, "T1DUAL", "Ground")
204
+ seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
205
+
206
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
207
+ img_outfile = join(output_images, patient_name + "_0000.nii.gz")
208
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
209
+
210
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
211
+ img_outfile = join(output_images, patient_name + "_0001.nii.gz")
212
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
213
+
214
+ img_sitk = sitk.ReadImage(img_outfile)
215
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
216
+ seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
217
+ seg_itk = copy_geometry(seg_itk, img_sitk)
218
+ sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
219
+ patient_ids.append(patient_name)
220
+
221
+ # Process T1 test
222
+ d = join(root_test, "MR")
223
+ patients = subdirs(d, join=False)
224
+ for p in patients:
225
+ patient_name = "T1_" + p
226
+
227
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
228
+ img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
229
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
230
+
231
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
232
+ img_outfile = join(output_imagesTs, patient_name + "_0001.nii.gz")
233
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
234
+
235
+ img_sitk = sitk.ReadImage(img_outfile)
236
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
237
+ patient_ids_test.append(patient_name)
238
+
239
+ # Process T2 train
240
+ d = join(root, "MR")
241
+ patients = subdirs(d, join=False)
242
+ for p in patients:
243
+ patient_name = "T2_" + p
244
+
245
+ gt_dir = join(d, p, "T2SPIR", "Ground")
246
+ seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
247
+
248
+ img_dir = join(d, p, "T2SPIR", "DICOM_anon")
249
+ img_outfile = join(output_images, patient_name + "_0000.nii.gz")
250
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
251
+ shutil.copy(join(output_images, patient_name + "_0000.nii.gz"), join(output_images, patient_name + "_0001.nii.gz"))
252
+
253
+ img_sitk = sitk.ReadImage(img_outfile)
254
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
255
+ seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
256
+ seg_itk = copy_geometry(seg_itk, img_sitk)
257
+ sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
258
+ patient_ids.append(patient_name)
259
+
260
+ # Process T2 test
261
+ d = join(root_test, "MR")
262
+ patients = subdirs(d, join=False)
263
+ for p in patients:
264
+ patient_name = "T2_" + p
265
+
266
+ gt_dir = join(d, p, "T2SPIR", "Ground")
267
+
268
+ img_dir = join(d, p, "T2SPIR", "DICOM_anon")
269
+ img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
270
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
271
+ shutil.copy(join(output_imagesTs, patient_name + "_0000.nii.gz"), join(output_imagesTs, patient_name + "_0001.nii.gz"))
272
+
273
+ img_sitk = sitk.ReadImage(img_outfile)
274
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
275
+ patient_ids_test.append(patient_name)
276
+
277
+ json_dict = OrderedDict()
278
+ json_dict['name'] = "Chaos Challenge Task3/5 Variant 1"
279
+ json_dict['description'] = "nothing"
280
+ json_dict['tensorImageSize'] = "4D"
281
+ json_dict['reference'] = "https://chaos.grand-challenge.org/Data/"
282
+ json_dict['licence'] = "see https://chaos.grand-challenge.org/Data/"
283
+ json_dict['release'] = "0.0"
284
+ json_dict['modality'] = {
285
+ "0": "MRI",
286
+ "1": "MRI",
287
+ }
288
+ json_dict['labels'] = {
289
+ "0": "background",
290
+ "1": "liver",
291
+ "2": "right kidney",
292
+ "3": "left kidney",
293
+ "4": "spleen",
294
+ }
295
+ json_dict['numTraining'] = len(patient_ids)
296
+ json_dict['numTest'] = 0
297
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
298
+ patient_ids]
299
+ json_dict['test'] = []
300
+
301
+ save_json(json_dict, join(output_folder, "dataset.json"))
302
+
303
+ ##############################################################
304
+ # Variant 2
305
+ ##############################################################
306
+
307
+ patient_ids = []
308
+ patient_ids_test = []
309
+
310
+ output_folder = join(out_base, "Task038_CHAOS_Task_3_5_Variant2")
311
+ output_images = join(output_folder, "imagesTr")
312
+ output_imagesTs = join(output_folder, "imagesTs")
313
+ output_labels = join(output_folder, "labelsTr")
314
+ maybe_mkdir_p(output_images)
315
+ maybe_mkdir_p(output_imagesTs)
316
+ maybe_mkdir_p(output_labels)
317
+
318
+ # Process T1 train
319
+ d = join(root, "MR")
320
+ patients = subdirs(d, join=False)
321
+ for p in patients:
322
+ patient_name_in = "T1_in_" + p
323
+ patient_name_out = "T1_out_" + p
324
+ gt_dir = join(d, p, "T1DUAL", "Ground")
325
+ seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
326
+
327
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
328
+ img_outfile = join(output_images, patient_name_in + "_0000.nii.gz")
329
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
330
+
331
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
332
+ img_outfile = join(output_images, patient_name_out + "_0000.nii.gz")
333
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
334
+
335
+ img_sitk = sitk.ReadImage(img_outfile)
336
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
337
+ seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
338
+ seg_itk = copy_geometry(seg_itk, img_sitk)
339
+ sitk.WriteImage(seg_itk, join(output_labels, patient_name_in + ".nii.gz"))
340
+ sitk.WriteImage(seg_itk, join(output_labels, patient_name_out + ".nii.gz"))
341
+ patient_ids.append(patient_name_out)
342
+ patient_ids.append(patient_name_in)
343
+
344
+ # Process T1 test
345
+ d = join(root_test, "MR")
346
+ patients = subdirs(d, join=False)
347
+ for p in patients:
348
+ patient_name_in = "T1_in_" + p
349
+ patient_name_out = "T1_out_" + p
350
+ gt_dir = join(d, p, "T1DUAL", "Ground")
351
+
352
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
353
+ img_outfile = join(output_imagesTs, patient_name_in + "_0000.nii.gz")
354
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
355
+
356
+ img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
357
+ img_outfile = join(output_imagesTs, patient_name_out + "_0000.nii.gz")
358
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
359
+
360
+ img_sitk = sitk.ReadImage(img_outfile)
361
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
362
+ patient_ids_test.append(patient_name_out)
363
+ patient_ids_test.append(patient_name_in)
364
+
365
+ # Process T2 train
366
+ d = join(root, "MR")
367
+ patients = subdirs(d, join=False)
368
+ for p in patients:
369
+ patient_name = "T2_" + p
370
+
371
+ gt_dir = join(d, p, "T2SPIR", "Ground")
372
+ seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
373
+
374
+ img_dir = join(d, p, "T2SPIR", "DICOM_anon")
375
+ img_outfile = join(output_images, patient_name + "_0000.nii.gz")
376
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
377
+
378
+ img_sitk = sitk.ReadImage(img_outfile)
379
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
380
+ seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
381
+ seg_itk = copy_geometry(seg_itk, img_sitk)
382
+ sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
383
+ patient_ids.append(patient_name)
384
+
385
+ # Process T2 test
386
+ d = join(root_test, "MR")
387
+ patients = subdirs(d, join=False)
388
+ for p in patients:
389
+ patient_name = "T2_" + p
390
+
391
+ gt_dir = join(d, p, "T2SPIR", "Ground")
392
+
393
+ img_dir = join(d, p, "T2SPIR", "DICOM_anon")
394
+ img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
395
+ _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
396
+
397
+ img_sitk = sitk.ReadImage(img_outfile)
398
+ img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
399
+ patient_ids_test.append(patient_name)
400
+
401
+ json_dict = OrderedDict()
402
+ json_dict['name'] = "Chaos Challenge Task3/5 Variant 2"
403
+ json_dict['description'] = "nothing"
404
+ json_dict['tensorImageSize'] = "4D"
405
+ json_dict['reference'] = "https://chaos.grand-challenge.org/Data/"
406
+ json_dict['licence'] = "see https://chaos.grand-challenge.org/Data/"
407
+ json_dict['release'] = "0.0"
408
+ json_dict['modality'] = {
409
+ "0": "MRI",
410
+ }
411
+ json_dict['labels'] = {
412
+ "0": "background",
413
+ "1": "liver",
414
+ "2": "right kidney",
415
+ "3": "left kidney",
416
+ "4": "spleen",
417
+ }
418
+ json_dict['numTraining'] = len(patient_ids)
419
+ json_dict['numTest'] = 0
420
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
421
+ patient_ids]
422
+ json_dict['test'] = []
423
+
424
+ save_json(json_dict, join(output_folder, "dataset.json"))
425
+
426
+ #################################################
427
+ # custom split
428
+ #################################################
429
+ patients = subdirs(join(root, "MR"), join=False)
430
+ task_name_variant1 = "Task037_CHAOS_Task_3_5_Variant1"
431
+ task_name_variant2 = "Task038_CHAOS_Task_3_5_Variant2"
432
+
433
+ output_preprocessed_v1 = join(preprocessing_output_dir, task_name_variant1)
434
+ maybe_mkdir_p(output_preprocessed_v1)
435
+
436
+ output_preprocessed_v2 = join(preprocessing_output_dir, task_name_variant2)
437
+ maybe_mkdir_p(output_preprocessed_v2)
438
+
439
+ splits = []
440
+ for fold in range(5):
441
+ tr, val = get_split_deterministic(patients, fold, 5, 12345)
442
+ train = ["T2_" + i for i in tr] + ["T1_" + i for i in tr]
443
+ validation = ["T2_" + i for i in val] + ["T1_" + i for i in val]
444
+ splits.append({
445
+ 'train': train,
446
+ 'val': validation
447
+ })
448
+ save_pickle(splits, join(output_preprocessed_v1, "splits_final.pkl"))
449
+
450
+ splits = []
451
+ for fold in range(5):
452
+ tr, val = get_split_deterministic(patients, fold, 5, 12345)
453
+ train = ["T2_" + i for i in tr] + ["T1_in_" + i for i in tr] + ["T1_out_" + i for i in tr]
454
+ validation = ["T2_" + i for i in val] + ["T1_in_" + i for i in val] + ["T1_out_" + i for i in val]
455
+ splits.append({
456
+ 'train': train,
457
+ 'val': validation
458
+ })
459
+ save_pickle(splits, join(output_preprocessed_v2, "splits_final.pkl"))
460
+
nnunet/dataset_conversion/Task040_KiTS.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from copy import deepcopy
17
+
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ import shutil
20
+ import SimpleITK as sitk
21
+ from multiprocessing import Pool
22
+ from medpy.metric import dc
23
+ import numpy as np
24
+ from nnunet.paths import network_training_output_dir
25
+ from scipy.ndimage import label
26
+
27
+
28
+ def compute_dice_scores(ref: str, pred: str):
29
+ ref = sitk.GetArrayFromImage(sitk.ReadImage(ref))
30
+ pred = sitk.GetArrayFromImage(sitk.ReadImage(pred))
31
+ kidney_mask_ref = ref > 0
32
+ kidney_mask_pred = pred > 0
33
+ if np.sum(kidney_mask_pred) == 0 and kidney_mask_ref.sum() == 0:
34
+ kidney_dice = np.nan
35
+ else:
36
+ kidney_dice = dc(kidney_mask_pred, kidney_mask_ref)
37
+
38
+ tumor_mask_ref = ref == 2
39
+ tumor_mask_pred = pred == 2
40
+ if np.sum(tumor_mask_ref) == 0 and tumor_mask_pred.sum() == 0:
41
+ tumor_dice = np.nan
42
+ else:
43
+ tumor_dice = dc(tumor_mask_ref, tumor_mask_pred)
44
+
45
+ geometric_mean = np.mean((kidney_dice, tumor_dice))
46
+ return kidney_dice, tumor_dice, geometric_mean
47
+
48
+
49
+ def evaluate_folder(folder_gt: str, folder_pred: str):
50
+ p = Pool(8)
51
+ niftis = subfiles(folder_gt, suffix=".nii.gz", join=False)
52
+ images_gt = [join(folder_gt, i) for i in niftis]
53
+ images_pred = [join(folder_pred, i) for i in niftis]
54
+ results = p.starmap(compute_dice_scores, zip(images_gt, images_pred))
55
+ p.close()
56
+ p.join()
57
+
58
+ with open(join(folder_pred, "results.csv"), 'w') as f:
59
+ for i, ni in enumerate(niftis):
60
+ f.write("%s,%0.4f,%0.4f,%0.4f\n" % (ni, *results[i]))
61
+
62
+
63
+ def remove_all_but_the_two_largest_conn_comp(img_itk_file: str, file_out: str):
64
+ """
65
+ This was not used. I was just curious because others used this. Turns out this is not necessary for my networks
66
+ """
67
+ img_itk = sitk.ReadImage(img_itk_file)
68
+ img_npy = sitk.GetArrayFromImage(img_itk)
69
+
70
+ labelmap, num_labels = label((img_npy > 0).astype(int))
71
+
72
+ if num_labels > 2:
73
+ label_sizes = []
74
+ for i in range(1, num_labels + 1):
75
+ label_sizes.append(np.sum(labelmap == i))
76
+ argsrt = np.argsort(label_sizes)[::-1] # two largest are now argsrt[0] and argsrt[1]
77
+ keep_mask = (labelmap == argsrt[0] + 1) | (labelmap == argsrt[1] + 1)
78
+ img_npy[~keep_mask] = 0
79
+ new = sitk.GetImageFromArray(img_npy)
80
+ new.CopyInformation(img_itk)
81
+ sitk.WriteImage(new, file_out)
82
+ print(os.path.basename(img_itk_file), num_labels, label_sizes)
83
+ else:
84
+ shutil.copy(img_itk_file, file_out)
85
+
86
+
87
+ def manual_postprocess(folder_in,
88
+ folder_out):
89
+ """
90
+ This was not used. I was just curious because others used this. Turns out this is not necessary for my networks
91
+ """
92
+ maybe_mkdir_p(folder_out)
93
+ infiles = subfiles(folder_in, suffix=".nii.gz", join=False)
94
+
95
+ outfiles = [join(folder_out, i) for i in infiles]
96
+ infiles = [join(folder_in, i) for i in infiles]
97
+
98
+ p = Pool(8)
99
+ _ = p.starmap_async(remove_all_but_the_two_largest_conn_comp, zip(infiles, outfiles))
100
+ _ = _.get()
101
+ p.close()
102
+ p.join()
103
+
104
+
105
+
106
+
107
+ def copy_npz_fom_valsets():
108
+ '''
109
+ this is preparation for ensembling
110
+ :return:
111
+ '''
112
+ base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
113
+ folders = ['nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
114
+ 'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
115
+ 'nnUNetTrainerNewCandidate23__nnUNetPlans']
116
+ for f in folders:
117
+ out = join(base, f, 'crossval_npz')
118
+ maybe_mkdir_p(out)
119
+ shutil.copy(join(base, f, 'plans.pkl'), out)
120
+ for fold in range(5):
121
+ cur = join(base, f, 'fold_%d' % fold, 'validation_raw')
122
+ npz_files = subfiles(cur, suffix='.npz', join=False)
123
+ pkl_files = [i[:-3] + 'pkl' for i in npz_files]
124
+ assert all([isfile(join(cur, i)) for i in pkl_files])
125
+ for n in npz_files:
126
+ corresponding_pkl = n[:-3] + 'pkl'
127
+ shutil.copy(join(cur, n), out)
128
+ shutil.copy(join(cur, corresponding_pkl), out)
129
+
130
+
131
+ def ensemble(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
132
+ 'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans'), out_dir="/media/fabian/Results/nnUNet/3d_lowres/Task048_KiTS_clean/ensemble_preactres_and_res"):
133
+ from nnunet.inference.ensemble_predictions import merge
134
+ folders = [join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean", i, 'crossval_npz') for i in experiments]
135
+ merge(folders, out_dir, 8)
136
+
137
+
138
+ def prepare_submission(fld= "/home/fabian/drives/datasets/results/nnUNet/test_sets/Task048_KiTS_clean/predicted_ens_3d_fullres_3d_cascade_fullres_postprocessed", # '/home/fabian/datasets_fabian/predicted_KiTS_nnUNetTrainerNewCandidate23_FabiansResNet',
139
+ out='/home/fabian/drives/datasets/results/nnUNet/test_sets/Task048_KiTS_clean/submission'):
140
+ nii = subfiles(fld, join=False, suffix='.nii.gz')
141
+ maybe_mkdir_p(out)
142
+ for n in nii:
143
+ outfname = n.replace('case', 'prediction')
144
+ shutil.copy(join(fld, n), join(out, outfname))
145
+
146
+
147
+ def pretent_to_be_nnUNetTrainer(base, folds=(0, 1, 2, 3, 4)):
148
+ """
149
+ changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
150
+ :param experiments:
151
+ :return:
152
+ """
153
+ for fold in folds:
154
+ cur = join(base, "fold_%d" % fold)
155
+ pkl_file = join(cur, 'model_best.model.pkl')
156
+ a = load_pickle(pkl_file)
157
+ a['name_old'] = deepcopy(a['name'])
158
+ a['name'] = 'nnUNetTrainer'
159
+ save_pickle(a, pkl_file)
160
+
161
+
162
+ def reset_trainerName(base, folds=(0, 1, 2, 3, 4)):
163
+ for fold in folds:
164
+ cur = join(base, "fold_%d" % fold)
165
+ pkl_file = join(cur, 'model_best.model.pkl')
166
+ a = load_pickle(pkl_file)
167
+ a['name'] = a['name_old']
168
+ del a['name_old']
169
+ save_pickle(a, pkl_file)
170
+
171
+
172
+ def nnUNetTrainer_these(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
173
+ 'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
174
+ 'nnUNetTrainerNewCandidate23__nnUNetPlans')):
175
+ """
176
+ changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
177
+ :param experiments:
178
+ :return:
179
+ """
180
+ base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
181
+ for exp in experiments:
182
+ cur = join(base, exp)
183
+ pretent_to_be_nnUNetTrainer(cur)
184
+
185
+
186
+ def reset_trainerName_these(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
187
+ 'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
188
+ 'nnUNetTrainerNewCandidate23__nnUNetPlans')):
189
+ """
190
+ changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
191
+ :param experiments:
192
+ :return:
193
+ """
194
+ base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
195
+ for exp in experiments:
196
+ cur = join(base, exp)
197
+ reset_trainerName(cur)
198
+
199
+
200
+ if __name__ == "__main__":
201
+ base = "/media/fabian/My Book/datasets/KiTS2019_Challenge/kits19/data"
202
+ out = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task040_KiTS"
203
+ cases = subdirs(base, join=False)
204
+
205
+ maybe_mkdir_p(out)
206
+ maybe_mkdir_p(join(out, "imagesTr"))
207
+ maybe_mkdir_p(join(out, "imagesTs"))
208
+ maybe_mkdir_p(join(out, "labelsTr"))
209
+
210
+ for c in cases:
211
+ case_id = int(c.split("_")[-1])
212
+ if case_id < 210:
213
+ shutil.copy(join(base, c, "imaging.nii.gz"), join(out, "imagesTr", c + "_0000.nii.gz"))
214
+ shutil.copy(join(base, c, "segmentation.nii.gz"), join(out, "labelsTr", c + ".nii.gz"))
215
+ else:
216
+ shutil.copy(join(base, c, "imaging.nii.gz"), join(out, "imagesTs", c + "_0000.nii.gz"))
217
+
218
+ json_dict = {}
219
+ json_dict['name'] = "KiTS"
220
+ json_dict['description'] = "kidney and kidney tumor segmentation"
221
+ json_dict['tensorImageSize'] = "4D"
222
+ json_dict['reference'] = "KiTS data for nnunet"
223
+ json_dict['licence'] = ""
224
+ json_dict['release'] = "0.0"
225
+ json_dict['modality'] = {
226
+ "0": "CT",
227
+ }
228
+ json_dict['labels'] = {
229
+ "0": "background",
230
+ "1": "Kidney",
231
+ "2": "Tumor"
232
+ }
233
+ json_dict['numTraining'] = len(cases)
234
+ json_dict['numTest'] = 0
235
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
236
+ cases]
237
+ json_dict['test'] = []
238
+
239
+ save_json(json_dict, os.path.join(out, "dataset.json"))
240
+
nnunet/dataset_conversion/Task043_BraTS_2019.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import numpy as np
17
+ from collections import OrderedDict
18
+
19
+ from batchgenerators.utilities.file_and_folder_operations import *
20
+ from nnunet.paths import nnUNet_raw_data
21
+ import SimpleITK as sitk
22
+ import shutil
23
+
24
+
25
+ def copy_BraTS_segmentation_and_convert_labels(in_file, out_file):
26
+ # use this for segmentation only!!!
27
+ # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3
28
+ img = sitk.ReadImage(in_file)
29
+ img_npy = sitk.GetArrayFromImage(img)
30
+
31
+ uniques = np.unique(img_npy)
32
+ for u in uniques:
33
+ if u not in [0, 1, 2, 4]:
34
+ raise RuntimeError('unexpected label')
35
+
36
+ seg_new = np.zeros_like(img_npy)
37
+ seg_new[img_npy == 4] = 3
38
+ seg_new[img_npy == 2] = 1
39
+ seg_new[img_npy == 1] = 2
40
+ img_corr = sitk.GetImageFromArray(seg_new)
41
+ img_corr.CopyInformation(img)
42
+ sitk.WriteImage(img_corr, out_file)
43
+
44
+
45
+ if __name__ == "__main__":
46
+ """
47
+ REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
48
+ """
49
+
50
+ task_name = "Task043_BraTS2019"
51
+ downloaded_data_dir = "/home/sdp/MLPERF/Brats2019_DATA/MICCAI_BraTS_2019_Data_Training"
52
+
53
+ target_base = join(nnUNet_raw_data, task_name)
54
+ target_imagesTr = join(target_base, "imagesTr")
55
+ target_imagesVal = join(target_base, "imagesVal")
56
+ target_imagesTs = join(target_base, "imagesTs")
57
+ target_labelsTr = join(target_base, "labelsTr")
58
+
59
+ maybe_mkdir_p(target_imagesTr)
60
+ maybe_mkdir_p(target_imagesVal)
61
+ maybe_mkdir_p(target_imagesTs)
62
+ maybe_mkdir_p(target_labelsTr)
63
+
64
+ patient_names = []
65
+ for tpe in ["HGG", "LGG"]:
66
+ cur = join(downloaded_data_dir, tpe)
67
+ for p in subdirs(cur, join=False):
68
+ patdir = join(cur, p)
69
+ patient_name = tpe + "__" + p
70
+ patient_names.append(patient_name)
71
+ t1 = join(patdir, p + "_t1.nii.gz")
72
+ t1c = join(patdir, p + "_t1ce.nii.gz")
73
+ t2 = join(patdir, p + "_t2.nii.gz")
74
+ flair = join(patdir, p + "_flair.nii.gz")
75
+ seg = join(patdir, p + "_seg.nii.gz")
76
+
77
+ assert all([
78
+ isfile(t1),
79
+ isfile(t1c),
80
+ isfile(t2),
81
+ isfile(flair),
82
+ isfile(seg)
83
+ ]), "%s" % patient_name
84
+
85
+ shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
86
+ shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
87
+ shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
88
+ shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
89
+
90
+ copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
91
+
92
+
93
+ json_dict = OrderedDict()
94
+ json_dict['name'] = "BraTS2019"
95
+ json_dict['description'] = "nothing"
96
+ json_dict['tensorImageSize'] = "4D"
97
+ json_dict['reference'] = "see BraTS2019"
98
+ json_dict['licence'] = "see BraTS2019 license"
99
+ json_dict['release'] = "0.0"
100
+ json_dict['modality'] = {
101
+ "0": "T1",
102
+ "1": "T1ce",
103
+ "2": "T2",
104
+ "3": "FLAIR"
105
+ }
106
+ json_dict['labels'] = {
107
+ "0": "background",
108
+ "1": "edema",
109
+ "2": "non-enhancing",
110
+ "3": "enhancing",
111
+ }
112
+ json_dict['numTraining'] = len(patient_names)
113
+ json_dict['numTest'] = 0
114
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
115
+ patient_names]
116
+ json_dict['test'] = []
117
+
118
+ save_json(json_dict, join(target_base, "dataset.json"))
119
+
120
+ downloaded_data_dir = "/home/sdp/MLPERF/Brats2019_DATA/MICCAI_BraTS_2019_Data_Validation"
121
+
122
+ for p in subdirs(downloaded_data_dir, join=False):
123
+ patdir = join(downloaded_data_dir, p)
124
+ patient_name = p
125
+ t1 = join(patdir, p + "_t1.nii.gz")
126
+ t1c = join(patdir, p + "_t1ce.nii.gz")
127
+ t2 = join(patdir, p + "_t2.nii.gz")
128
+ flair = join(patdir, p + "_flair.nii.gz")
129
+
130
+ assert all([
131
+ isfile(t1),
132
+ isfile(t1c),
133
+ isfile(t2),
134
+ isfile(flair),
135
+ ]), "%s" % patient_name
136
+
137
+ shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
138
+ shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
139
+ shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
140
+ shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
141
+
142
+ """
143
+ #I dont have the testing data
144
+ downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Testing_FIsensee"
145
+
146
+ for p in subdirs(downloaded_data_dir, join=False):
147
+ patdir = join(downloaded_data_dir, p)
148
+ patient_name = p
149
+ t1 = join(patdir, p + "_t1.nii.gz")
150
+ t1c = join(patdir, p + "_t1ce.nii.gz")
151
+ t2 = join(patdir, p + "_t2.nii.gz")
152
+ flair = join(patdir, p + "_flair.nii.gz")
153
+
154
+ assert all([
155
+ isfile(t1),
156
+ isfile(t1c),
157
+ isfile(t2),
158
+ isfile(flair),
159
+ ]), "%s" % patient_name
160
+
161
+ shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
162
+ shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
163
+ shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
164
+ shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))"""
nnunet/dataset_conversion/Task055_SegTHOR.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from collections import OrderedDict
17
+ from nnunet.paths import nnUNet_raw_data
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ import shutil
20
+ import SimpleITK as sitk
21
+
22
+
23
+ def convert_for_submission(source_dir, target_dir):
24
+ """
25
+ I believe they want .nii, not .nii.gz
26
+ :param source_dir:
27
+ :param target_dir:
28
+ :return:
29
+ """
30
+ files = subfiles(source_dir, suffix=".nii.gz", join=False)
31
+ maybe_mkdir_p(target_dir)
32
+ for f in files:
33
+ img = sitk.ReadImage(join(source_dir, f))
34
+ out_file = join(target_dir, f[:-7] + ".nii")
35
+ sitk.WriteImage(img, out_file)
36
+
37
+
38
+
39
+ if __name__ == "__main__":
40
+ base = "/media/fabian/DeepLearningData/SegTHOR"
41
+
42
+ task_id = 55
43
+ task_name = "SegTHOR"
44
+
45
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
46
+
47
+ out_base = join(nnUNet_raw_data, foldername)
48
+ imagestr = join(out_base, "imagesTr")
49
+ imagests = join(out_base, "imagesTs")
50
+ labelstr = join(out_base, "labelsTr")
51
+ maybe_mkdir_p(imagestr)
52
+ maybe_mkdir_p(imagests)
53
+ maybe_mkdir_p(labelstr)
54
+
55
+ train_patient_names = []
56
+ test_patient_names = []
57
+ train_patients = subfolders(join(base, "train"), join=False)
58
+ for p in train_patients:
59
+ curr = join(base, "train", p)
60
+ label_file = join(curr, "GT.nii.gz")
61
+ image_file = join(curr, p + ".nii.gz")
62
+ shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
63
+ shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
64
+ train_patient_names.append(p)
65
+
66
+ test_patients = subfiles(join(base, "test"), join=False, suffix=".nii.gz")
67
+ for p in test_patients:
68
+ p = p[:-7]
69
+ curr = join(base, "test")
70
+ image_file = join(curr, p + ".nii.gz")
71
+ shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
72
+ test_patient_names.append(p)
73
+
74
+
75
+ json_dict = OrderedDict()
76
+ json_dict['name'] = "SegTHOR"
77
+ json_dict['description'] = "SegTHOR"
78
+ json_dict['tensorImageSize'] = "4D"
79
+ json_dict['reference'] = "see challenge website"
80
+ json_dict['licence'] = "see challenge website"
81
+ json_dict['release'] = "0.0"
82
+ json_dict['modality'] = {
83
+ "0": "CT",
84
+ }
85
+ json_dict['labels'] = {
86
+ "0": "background",
87
+ "1": "esophagus",
88
+ "2": "heart",
89
+ "3": "trachea",
90
+ "4": "aorta",
91
+ }
92
+ json_dict['numTraining'] = len(train_patient_names)
93
+ json_dict['numTest'] = len(test_patient_names)
94
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
95
+ train_patient_names]
96
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
97
+
98
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task056_VerSe2019.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from collections import OrderedDict
17
+ import SimpleITK as sitk
18
+ from multiprocessing.pool import Pool
19
+ from nnunet.configuration import default_num_threads
20
+ from nnunet.paths import nnUNet_raw_data
21
+ from batchgenerators.utilities.file_and_folder_operations import *
22
+ import shutil
23
+ from medpy import metric
24
+ import numpy as np
25
+ from nnunet.utilities.image_reorientation import reorient_all_images_in_folder_to_ras
26
+
27
+
28
+ def check_if_all_in_good_orientation(imagesTr_folder: str, labelsTr_folder: str, output_folder: str) -> None:
29
+ maybe_mkdir_p(output_folder)
30
+ filenames = subfiles(labelsTr_folder, suffix='.nii.gz', join=False)
31
+ import matplotlib.pyplot as plt
32
+ for n in filenames:
33
+ img = sitk.GetArrayFromImage(sitk.ReadImage(join(imagesTr_folder, n[:-7] + '_0000.nii.gz')))
34
+ lab = sitk.GetArrayFromImage(sitk.ReadImage(join(labelsTr_folder, n)))
35
+ assert np.all([i == j for i, j in zip(img.shape, lab.shape)])
36
+ z_slice = img.shape[0] // 2
37
+ img_slice = img[z_slice]
38
+ lab_slice = lab[z_slice]
39
+ lab_slice[lab_slice != 0] = 1
40
+ img_slice = img_slice - img_slice.min()
41
+ img_slice = img_slice / img_slice.max()
42
+ stacked = np.vstack((img_slice, lab_slice))
43
+ print(stacked.shape)
44
+ plt.imsave(join(output_folder, n[:-7] + '.png'), stacked, cmap='gray')
45
+
46
+
47
+ def evaluate_verse_case(sitk_file_ref:str, sitk_file_test:str):
48
+ """
49
+ Only vertebra that are present in the reference will be evaluated
50
+ :param sitk_file_ref:
51
+ :param sitk_file_test:
52
+ :return:
53
+ """
54
+ gt_npy = sitk.GetArrayFromImage(sitk.ReadImage(sitk_file_ref))
55
+ pred_npy = sitk.GetArrayFromImage(sitk.ReadImage(sitk_file_test))
56
+ dice_scores = []
57
+ for label in range(1, 26):
58
+ mask_gt = gt_npy == label
59
+ if np.sum(mask_gt) > 0:
60
+ mask_pred = pred_npy == label
61
+ dc = metric.dc(mask_pred, mask_gt)
62
+ else:
63
+ dc = np.nan
64
+ dice_scores.append(dc)
65
+ return dice_scores
66
+
67
+
68
+ def evaluate_verse_folder(folder_pred, folder_gt, out_json="/home/fabian/verse.json"):
69
+ p = Pool(default_num_threads)
70
+ files_gt_bare = subfiles(folder_gt, join=False)
71
+ assert all([isfile(join(folder_pred, i)) for i in files_gt_bare]), "some files are missing in the predicted folder"
72
+ files_pred = [join(folder_pred, i) for i in files_gt_bare]
73
+ files_gt = [join(folder_gt, i) for i in files_gt_bare]
74
+
75
+ results = p.starmap_async(evaluate_verse_case, zip(files_gt, files_pred))
76
+
77
+ results = results.get()
78
+
79
+ dct = {i: j for i, j in zip(files_gt_bare, results)}
80
+
81
+ results_stacked = np.vstack(results)
82
+ results_mean = np.nanmean(results_stacked, 0)
83
+ overall_mean = np.nanmean(results_mean)
84
+
85
+ save_json((dct, list(results_mean), overall_mean), out_json)
86
+ p.close()
87
+ p.join()
88
+
89
+
90
+ def print_unique_labels_and_their_volumes(image: str, print_only_if_vol_smaller_than: float = None):
91
+ img = sitk.ReadImage(image)
92
+ voxel_volume = np.prod(img.GetSpacing())
93
+ img_npy = sitk.GetArrayFromImage(img)
94
+ uniques = [i for i in np.unique(img_npy) if i != 0]
95
+ volumes = {i: np.sum(img_npy == i) * voxel_volume for i in uniques}
96
+ print('')
97
+ print(image.split('/')[-1])
98
+ print('uniques:', uniques)
99
+ for k in volumes.keys():
100
+ v = volumes[k]
101
+ if print_only_if_vol_smaller_than is not None and v > print_only_if_vol_smaller_than:
102
+ pass
103
+ else:
104
+ print('k:', k, '\tvol:', volumes[k])
105
+
106
+
107
+ def remove_label(label_file: str, remove_this: int, replace_with: int = 0):
108
+ img = sitk.ReadImage(label_file)
109
+ img_npy = sitk.GetArrayFromImage(img)
110
+ img_npy[img_npy == remove_this] = replace_with
111
+ img2 = sitk.GetImageFromArray(img_npy)
112
+ img2.CopyInformation(img)
113
+ sitk.WriteImage(img2, label_file)
114
+
115
+
116
+ if __name__ == "__main__":
117
+ ### First we create a nnunet dataset from verse. After this the images will be all willy nilly in their
118
+ # orientation because that's how VerSe comes
119
+ base = '/media/fabian/DeepLearningData/VerSe2019'
120
+ base = "/home/fabian/data/VerSe2019"
121
+
122
+ # correct orientation
123
+ train_files_base = subfiles(join(base, "train"), join=False, suffix="_seg.nii.gz")
124
+ train_segs = [i[:-len("_seg.nii.gz")] + "_seg.nii.gz" for i in train_files_base]
125
+ train_data = [i[:-len("_seg.nii.gz")] + ".nii.gz" for i in train_files_base]
126
+ test_files_base = [i[:-len(".nii.gz")] for i in subfiles(join(base, "test"), join=False, suffix=".nii.gz")]
127
+ test_data = [i + ".nii.gz" for i in test_files_base]
128
+
129
+ task_id = 56
130
+ task_name = "VerSe"
131
+
132
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
133
+
134
+ out_base = join(nnUNet_raw_data, foldername)
135
+ imagestr = join(out_base, "imagesTr")
136
+ imagests = join(out_base, "imagesTs")
137
+ labelstr = join(out_base, "labelsTr")
138
+ maybe_mkdir_p(imagestr)
139
+ maybe_mkdir_p(imagests)
140
+ maybe_mkdir_p(labelstr)
141
+
142
+ train_patient_names = [i[:-len("_seg.nii.gz")] for i in subfiles(join(base, "train"), join=False, suffix="_seg.nii.gz")]
143
+ for p in train_patient_names:
144
+ curr = join(base, "train")
145
+ label_file = join(curr, p + "_seg.nii.gz")
146
+ image_file = join(curr, p + ".nii.gz")
147
+ shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
148
+ shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
149
+
150
+ test_patient_names = [i[:-7] for i in subfiles(join(base, "test"), join=False, suffix=".nii.gz")]
151
+ for p in test_patient_names:
152
+ curr = join(base, "test")
153
+ image_file = join(curr, p + ".nii.gz")
154
+ shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
155
+
156
+
157
+ json_dict = OrderedDict()
158
+ json_dict['name'] = "VerSe2019"
159
+ json_dict['description'] = "VerSe2019"
160
+ json_dict['tensorImageSize'] = "4D"
161
+ json_dict['reference'] = "see challenge website"
162
+ json_dict['licence'] = "see challenge website"
163
+ json_dict['release'] = "0.0"
164
+ json_dict['modality'] = {
165
+ "0": "CT",
166
+ }
167
+ json_dict['labels'] = {i: str(i) for i in range(26)}
168
+
169
+ json_dict['numTraining'] = len(train_patient_names)
170
+ json_dict['numTest'] = len(test_patient_names)
171
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
172
+ train_patient_names]
173
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
174
+
175
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
176
+
177
+ # now we reorient all those images to ras. This saves a pkl with the original affine. We need this information to
178
+ # bring our predictions into the same geometry for submission
179
+ reorient_all_images_in_folder_to_ras(imagestr)
180
+ reorient_all_images_in_folder_to_ras(imagests)
181
+ reorient_all_images_in_folder_to_ras(labelstr)
182
+
183
+ # sanity check
184
+ check_if_all_in_good_orientation(imagestr, labelstr, join(out_base, 'sanitycheck'))
185
+ # looks good to me - proceed
186
+
187
+ # check the volumes of the vertebrae
188
+ _ = [print_unique_labels_and_their_volumes(i, 1000) for i in subfiles(labelstr, suffix='.nii.gz')]
189
+
190
+ # some cases appear fishy. For example, verse063.nii.gz has labels [1, 20, 21, 22, 23, 24] and 1 only has a volume
191
+ # of 63mm^3
192
+
193
+ #let's correct those
194
+
195
+ # 19 is connected to the image border and should not be segmented. Only one slice of 19 is segmented in the
196
+ # reference. Looks wrong
197
+ remove_label(join(labelstr, 'verse031.nii.gz'), 19, 0)
198
+
199
+ # spurious annotation of 18 (vol: 8.00)
200
+ remove_label(join(labelstr, 'verse060.nii.gz'), 18, 0)
201
+
202
+ # spurious annotation of 16 (vol: 3.00)
203
+ remove_label(join(labelstr, 'verse061.nii.gz'), 16, 0)
204
+
205
+ # spurious annotation of 1 (vol: 63.00) although the rest of the vertebra is [20, 21, 22, 23, 24]
206
+ remove_label(join(labelstr, 'verse063.nii.gz'), 1, 0)
207
+
208
+ # spurious annotation of 3 (vol: 9.53) although the rest of the vertebra is
209
+ # [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
210
+ remove_label(join(labelstr, 'verse074.nii.gz'), 3, 0)
211
+
212
+ # spurious annotation of 3 (vol: 15.00)
213
+ remove_label(join(labelstr, 'verse097.nii.gz'), 3, 0)
214
+
215
+ # spurious annotation of 3 (vol: 10) although the rest of the vertebra is
216
+ # [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
217
+ remove_label(join(labelstr, 'verse151.nii.gz'), 3, 0)
218
+
219
+ # spurious annotation of 25 (vol: 4) although the rest of the vertebra is
220
+ # [1, 2, 3, 4, 5, 6, 7, 8, 9]
221
+ remove_label(join(labelstr, 'verse201.nii.gz'), 25, 0)
222
+
223
+ # spurious annotation of 23 (vol: 8) although the rest of the vertebra is
224
+ # [1, 2, 3, 4, 5, 6, 7, 8]
225
+ remove_label(join(labelstr, 'verse207.nii.gz'), 23, 0)
226
+
227
+ # spurious annotation of 23 (vol: 12) although the rest of the vertebra is
228
+ # [1, 2, 3, 4, 5, 6, 7, 8, 9]
229
+ remove_label(join(labelstr, 'verse208.nii.gz'), 23, 0)
230
+
231
+ # spurious annotation of 23 (vol: 2) although the rest of the vertebra is
232
+ # [1, 2, 3, 4, 5, 6, 7, 8, 9]
233
+ remove_label(join(labelstr, 'verse212.nii.gz'), 23, 0)
234
+
235
+ # spurious annotation of 20 (vol: 4) although the rest of the vertebra is
236
+ # [1, 2, 3, 4, 5, 6, 7, 8, 9]
237
+ remove_label(join(labelstr, 'verse214.nii.gz'), 20, 0)
238
+
239
+ # spurious annotation of 23 (vol: 15) although the rest of the vertebra is
240
+ # [1, 2, 3, 4, 5, 6, 7, 8]
241
+ remove_label(join(labelstr, 'verse223.nii.gz'), 23, 0)
242
+
243
+ # spurious annotation of 23 (vol: 1) and 25 (vol: 7) although the rest of the vertebra is
244
+ # [1, 2, 3, 4, 5, 6, 7, 8, 9]
245
+ remove_label(join(labelstr, 'verse226.nii.gz'), 23, 0)
246
+ remove_label(join(labelstr, 'verse226.nii.gz'), 25, 0)
247
+
248
+ # spurious annotation of 25 (vol: 27) although the rest of the vertebra is
249
+ # [1, 2, 3, 4, 5, 6, 7, 8]
250
+ remove_label(join(labelstr, 'verse227.nii.gz'), 25, 0)
251
+
252
+ # spurious annotation of 20 (vol: 24) although the rest of the vertebra is
253
+ # [1, 2, 3, 4, 5, 6, 7, 8]
254
+ remove_label(join(labelstr, 'verse232.nii.gz'), 20, 0)
255
+
256
+
257
+ # Now we are ready to run nnU-Net
258
+
259
+
260
+ """# run this part of the code once training is done
261
+ folder_gt = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task056_VerSe/labelsTr"
262
+
263
+ folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_fullres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
264
+ out_json = "/home/fabian/Task056_VerSe_3d_fullres_summary.json"
265
+ evaluate_verse_folder(folder_pred, folder_gt, out_json)
266
+
267
+ folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_lowres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
268
+ out_json = "/home/fabian/Task056_VerSe_3d_lowres_summary.json"
269
+ evaluate_verse_folder(folder_pred, folder_gt, out_json)
270
+
271
+ folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_cascade_fullres/Task056_VerSe/nnUNetTrainerV2CascadeFullRes__nnUNetPlansv2.1/cv_niftis_raw"
272
+ out_json = "/home/fabian/Task056_VerSe_3d_cascade_fullres_summary.json"
273
+ evaluate_verse_folder(folder_pred, folder_gt, out_json)"""
274
+
nnunet/dataset_conversion/Task056_Verse_normalize_orientation.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ """
17
+ This code is copied from https://gist.github.com/nlessmann/24d405eaa82abba6676deb6be839266c. All credits go to the
18
+ original author (user nlessmann on GitHub)
19
+ """
20
+
21
+ import numpy as np
22
+ import SimpleITK as sitk
23
+
24
+
25
+ def reverse_axes(image):
26
+ return np.transpose(image, tuple(reversed(range(image.ndim))))
27
+
28
+
29
+ def read_image(imagefile):
30
+ image = sitk.ReadImage(imagefile)
31
+ data = reverse_axes(sitk.GetArrayFromImage(image)) # switch from zyx to xyz
32
+ header = {
33
+ 'spacing': image.GetSpacing(),
34
+ 'origin': image.GetOrigin(),
35
+ 'direction': image.GetDirection()
36
+ }
37
+ return data, header
38
+
39
+
40
+ def save_image(img: np.ndarray, header: dict, output_file: str):
41
+ """
42
+ CAREFUL you need to restore_original_slice_orientation before saving!
43
+ :param img:
44
+ :param header:
45
+ :return:
46
+ """
47
+ # reverse back
48
+ img = reverse_axes(img) # switch from zyx to xyz
49
+ img_itk = sitk.GetImageFromArray(img)
50
+ img_itk.SetSpacing(header['spacing'])
51
+ img_itk.SetOrigin(header['origin'])
52
+ if not isinstance(header['direction'], tuple):
53
+ img_itk.SetDirection(header['direction'].flatten())
54
+ else:
55
+ img_itk.SetDirection(header['direction'])
56
+
57
+ sitk.WriteImage(img_itk, output_file)
58
+
59
+
60
+ def swap_flip_dimensions(cosine_matrix, image, header=None):
61
+ # Compute swaps and flips
62
+ swap = np.argmax(abs(cosine_matrix), axis=0)
63
+ flip = np.sum(cosine_matrix, axis=0)
64
+
65
+ # Apply transformation to image volume
66
+ image = np.transpose(image, tuple(swap))
67
+ image = image[tuple(slice(None, None, int(f)) for f in flip)]
68
+
69
+ if header is None:
70
+ return image
71
+
72
+ # Apply transformation to header
73
+ header['spacing'] = tuple(header['spacing'][s] for s in swap)
74
+ header['direction'] = np.eye(3)
75
+
76
+ return image, header
77
+
78
+
79
+ def normalize_slice_orientation(image, header):
80
+ # Preserve original header so that we can easily transform back
81
+ header['original'] = header.copy()
82
+
83
+ # Compute inverse of cosine (round first because we assume 0/1 values only)
84
+ # to determine how the image has to be transposed and flipped for cosine = identity
85
+ cosine = np.asarray(header['direction']).reshape(3, 3)
86
+ cosine_inv = np.linalg.inv(np.round(cosine))
87
+
88
+ return swap_flip_dimensions(cosine_inv, image, header)
89
+
90
+
91
+ def restore_original_slice_orientation(mask, header):
92
+ # Use original orientation for transformation because we assume the image to be in
93
+ # normalized orientation, i.e., identity cosine)
94
+ cosine = np.asarray(header['original']['direction']).reshape(3, 3)
95
+ cosine_rnd = np.round(cosine)
96
+
97
+ # Apply transformations to both the image and the mask
98
+ return swap_flip_dimensions(cosine_rnd, mask), header['original']
nnunet/dataset_conversion/Task058_ISBI_EM_SEG.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from collections import OrderedDict
17
+
18
+ import SimpleITK as sitk
19
+ import numpy as np
20
+ from batchgenerators.utilities.file_and_folder_operations import *
21
+ from nnunet.paths import nnUNet_raw_data
22
+ from skimage import io
23
+
24
+
25
+ def export_for_submission(predicted_npz, out_file):
26
+ """
27
+ they expect us to submit a 32 bit 3d tif image with values between 0 (100% membrane certainty) and 1
28
+ (100% non-membrane certainty). We use the softmax output for that
29
+ :return:
30
+ """
31
+ a = np.load(predicted_npz)['softmax']
32
+ a = a / a.sum(0)[None]
33
+ # channel 0 is non-membrane prob
34
+ nonmembr_prob = a[0]
35
+ assert out_file.endswith(".tif")
36
+ io.imsave(out_file, nonmembr_prob.astype(np.float32))
37
+
38
+
39
+
40
+ if __name__ == "__main__":
41
+ # download from here http://brainiac2.mit.edu/isbi_challenge/downloads
42
+
43
+ base = "/media/fabian/My Book/datasets/ISBI_EM_SEG"
44
+ # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
45
+ # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
46
+ train_volume = io.imread(join(base, "train-volume.tif"))
47
+ train_labels = io.imread(join(base, "train-labels.tif"))
48
+ train_labels[train_labels == 255] = 1
49
+ test_volume = io.imread(join(base, "test-volume.tif"))
50
+
51
+ task_id = 58
52
+ task_name = "ISBI_EM_SEG"
53
+
54
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
55
+
56
+ out_base = join(nnUNet_raw_data, foldername)
57
+ imagestr = join(out_base, "imagesTr")
58
+ imagests = join(out_base, "imagesTs")
59
+ labelstr = join(out_base, "labelsTr")
60
+ maybe_mkdir_p(imagestr)
61
+ maybe_mkdir_p(imagests)
62
+ maybe_mkdir_p(labelstr)
63
+
64
+ img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
65
+ lab_tr_itk = sitk.GetImageFromArray(1 - train_labels) # walls are foreground, cells background
66
+ img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
67
+
68
+ img_tr_itk.SetSpacing((4, 4, 50))
69
+ lab_tr_itk.SetSpacing((4, 4, 50))
70
+ img_te_itk.SetSpacing((4, 4, 50))
71
+
72
+ # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
73
+ sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
74
+ sitk.WriteImage(img_tr_itk, join(imagestr, "training1_0000.nii.gz"))
75
+ sitk.WriteImage(img_tr_itk, join(imagestr, "training2_0000.nii.gz"))
76
+ sitk.WriteImage(img_tr_itk, join(imagestr, "training3_0000.nii.gz"))
77
+ sitk.WriteImage(img_tr_itk, join(imagestr, "training4_0000.nii.gz"))
78
+
79
+ sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
80
+ sitk.WriteImage(lab_tr_itk, join(labelstr, "training1.nii.gz"))
81
+ sitk.WriteImage(lab_tr_itk, join(labelstr, "training2.nii.gz"))
82
+ sitk.WriteImage(lab_tr_itk, join(labelstr, "training3.nii.gz"))
83
+ sitk.WriteImage(lab_tr_itk, join(labelstr, "training4.nii.gz"))
84
+
85
+ sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
86
+
87
+ json_dict = OrderedDict()
88
+ json_dict['name'] = task_name
89
+ json_dict['description'] = task_name
90
+ json_dict['tensorImageSize'] = "4D"
91
+ json_dict['reference'] = "see challenge website"
92
+ json_dict['licence'] = "see challenge website"
93
+ json_dict['release'] = "0.0"
94
+ json_dict['modality'] = {
95
+ "0": "EM",
96
+ }
97
+ json_dict['labels'] = {i: str(i) for i in range(2)}
98
+
99
+ json_dict['numTraining'] = 5
100
+ json_dict['numTest'] = 1
101
+ json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
102
+ range(5)]
103
+ json_dict['test'] = ["./imagesTs/testing.nii.gz"]
104
+
105
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task059_EPFL_EM_MITO_SEG.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import numpy as np
17
+ import subprocess
18
+ from collections import OrderedDict
19
+ from nnunet.paths import nnUNet_raw_data
20
+ from batchgenerators.utilities.file_and_folder_operations import *
21
+ import shutil
22
+ from skimage import io
23
+ import SimpleITK as sitk
24
+ import shutil
25
+
26
+
27
+ if __name__ == "__main__":
28
+ # download from here https://www.epfl.ch/labs/cvlab/data/data-em/
29
+
30
+ base = "/media/fabian/My Book/datasets/EPFL_MITO_SEG"
31
+ # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
32
+ # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
33
+ train_volume = io.imread(join(base, "training.tif"))
34
+ train_labels = io.imread(join(base, "training_groundtruth.tif"))
35
+ train_labels[train_labels == 255] = 1
36
+ test_volume = io.imread(join(base, "testing.tif"))
37
+ test_labels = io.imread(join(base, "testing_groundtruth.tif"))
38
+ test_labels[test_labels == 255] = 1
39
+
40
+ task_id = 59
41
+ task_name = "EPFL_EM_MITO_SEG"
42
+
43
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
44
+
45
+ out_base = join(nnUNet_raw_data, foldername)
46
+ imagestr = join(out_base, "imagesTr")
47
+ imagests = join(out_base, "imagesTs")
48
+ labelstr = join(out_base, "labelsTr")
49
+ labelste = join(out_base, "labelsTs")
50
+ maybe_mkdir_p(imagestr)
51
+ maybe_mkdir_p(imagests)
52
+ maybe_mkdir_p(labelstr)
53
+ maybe_mkdir_p(labelste)
54
+
55
+ img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
56
+ lab_tr_itk = sitk.GetImageFromArray(train_labels.astype(np.uint8))
57
+ img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
58
+ lab_te_itk = sitk.GetImageFromArray(test_labels.astype(np.uint8))
59
+
60
+ img_tr_itk.SetSpacing((5, 5, 5))
61
+ lab_tr_itk.SetSpacing((5, 5, 5))
62
+ img_te_itk.SetSpacing((5, 5, 5))
63
+ lab_te_itk.SetSpacing((5, 5, 5))
64
+
65
+ # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
66
+ sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
67
+ shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training1_0000.nii.gz"))
68
+ shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training2_0000.nii.gz"))
69
+ shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training3_0000.nii.gz"))
70
+ shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training4_0000.nii.gz"))
71
+
72
+ sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
73
+ shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training1.nii.gz"))
74
+ shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training2.nii.gz"))
75
+ shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training3.nii.gz"))
76
+ shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training4.nii.gz"))
77
+
78
+ sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
79
+ sitk.WriteImage(lab_te_itk, join(labelste, "testing.nii.gz"))
80
+
81
+ json_dict = OrderedDict()
82
+ json_dict['name'] = task_name
83
+ json_dict['description'] = task_name
84
+ json_dict['tensorImageSize'] = "4D"
85
+ json_dict['reference'] = "see challenge website"
86
+ json_dict['licence'] = "see challenge website"
87
+ json_dict['release'] = "0.0"
88
+ json_dict['modality'] = {
89
+ "0": "EM",
90
+ }
91
+ json_dict['labels'] = {i: str(i) for i in range(2)}
92
+
93
+ json_dict['numTraining'] = 5
94
+ json_dict['numTest'] = 1
95
+ json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
96
+ range(5)]
97
+ json_dict['test'] = ["./imagesTs/testing.nii.gz"]
98
+
99
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task061_CREMI.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from collections import OrderedDict
17
+
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ import numpy as np
20
+ from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
21
+ import shutil
22
+ import SimpleITK as sitk
23
+
24
+ try:
25
+ import h5py
26
+ except ImportError:
27
+ h5py = None
28
+
29
+
30
+ def load_sample(filename):
31
+ # we need raw data and seg
32
+ f = h5py.File(filename, 'r')
33
+ data = np.array(f['volumes']['raw'])
34
+
35
+ if 'labels' in f['volumes'].keys():
36
+ labels = np.array(f['volumes']['labels']['clefts'])
37
+ # clefts are low values, background is high
38
+ labels = (labels < 100000).astype(np.uint8)
39
+ else:
40
+ labels = None
41
+ return data, labels
42
+
43
+
44
+ def save_as_nifti(arr, filename, spacing):
45
+ itk_img = sitk.GetImageFromArray(arr)
46
+ itk_img.SetSpacing(spacing)
47
+ sitk.WriteImage(itk_img, filename)
48
+
49
+
50
+ def prepare_submission():
51
+ from cremi.io import CremiFile
52
+ from cremi.Volume import Volume
53
+
54
+ base = "/home/fabian/drives/datasets/results/nnUNet/test_sets/Task061_CREMI/"
55
+ # a+
56
+ pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_a+.nii.gz"))).astype(np.uint64)
57
+ pred[pred == 0] = 0xffffffffffffffff
58
+ out_a = CremiFile(join(base, 'sample_A+_20160601.hdf'), 'w')
59
+ clefts = Volume(pred, (40., 4., 4.))
60
+ out_a.write_clefts(clefts)
61
+ out_a.close()
62
+
63
+ pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_b+.nii.gz"))).astype(np.uint64)
64
+ pred[pred == 0] = 0xffffffffffffffff
65
+ out_b = CremiFile(join(base, 'sample_B+_20160601.hdf'), 'w')
66
+ clefts = Volume(pred, (40., 4., 4.))
67
+ out_b.write_clefts(clefts)
68
+ out_b.close()
69
+
70
+ pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_c+.nii.gz"))).astype(np.uint64)
71
+ pred[pred == 0] = 0xffffffffffffffff
72
+ out_c = CremiFile(join(base, 'sample_C+_20160601.hdf'), 'w')
73
+ clefts = Volume(pred, (40., 4., 4.))
74
+ out_c.write_clefts(clefts)
75
+ out_c.close()
76
+
77
+
78
+ if __name__ == "__main__":
79
+ assert h5py is not None, "you need h5py for this. Install with 'pip install h5py'"
80
+
81
+ foldername = "Task061_CREMI"
82
+ out_base = join(nnUNet_raw_data, foldername)
83
+ imagestr = join(out_base, "imagesTr")
84
+ imagests = join(out_base, "imagesTs")
85
+ labelstr = join(out_base, "labelsTr")
86
+ maybe_mkdir_p(imagestr)
87
+ maybe_mkdir_p(imagests)
88
+ maybe_mkdir_p(labelstr)
89
+
90
+ base = "/media/fabian/My Book/datasets/CREMI"
91
+
92
+ # train
93
+ img, label = load_sample(join(base, "sample_A_20160501.hdf"))
94
+ save_as_nifti(img, join(imagestr, "sample_a_0000.nii.gz"), (4, 4, 40))
95
+ save_as_nifti(label, join(labelstr, "sample_a.nii.gz"), (4, 4, 40))
96
+ img, label = load_sample(join(base, "sample_B_20160501.hdf"))
97
+ save_as_nifti(img, join(imagestr, "sample_b_0000.nii.gz"), (4, 4, 40))
98
+ save_as_nifti(label, join(labelstr, "sample_b.nii.gz"), (4, 4, 40))
99
+ img, label = load_sample(join(base, "sample_C_20160501.hdf"))
100
+ save_as_nifti(img, join(imagestr, "sample_c_0000.nii.gz"), (4, 4, 40))
101
+ save_as_nifti(label, join(labelstr, "sample_c.nii.gz"), (4, 4, 40))
102
+
103
+ save_as_nifti(img, join(imagestr, "sample_d_0000.nii.gz"), (4, 4, 40))
104
+ save_as_nifti(label, join(labelstr, "sample_d.nii.gz"), (4, 4, 40))
105
+
106
+ save_as_nifti(img, join(imagestr, "sample_e_0000.nii.gz"), (4, 4, 40))
107
+ save_as_nifti(label, join(labelstr, "sample_e.nii.gz"), (4, 4, 40))
108
+
109
+ # test
110
+ img, label = load_sample(join(base, "sample_A+_20160601.hdf"))
111
+ save_as_nifti(img, join(imagests, "sample_a+_0000.nii.gz"), (4, 4, 40))
112
+ img, label = load_sample(join(base, "sample_B+_20160601.hdf"))
113
+ save_as_nifti(img, join(imagests, "sample_b+_0000.nii.gz"), (4, 4, 40))
114
+ img, label = load_sample(join(base, "sample_C+_20160601.hdf"))
115
+ save_as_nifti(img, join(imagests, "sample_c+_0000.nii.gz"), (4, 4, 40))
116
+
117
+ json_dict = OrderedDict()
118
+ json_dict['name'] = foldername
119
+ json_dict['description'] = foldername
120
+ json_dict['tensorImageSize'] = "4D"
121
+ json_dict['reference'] = "see challenge website"
122
+ json_dict['licence'] = "see challenge website"
123
+ json_dict['release'] = "0.0"
124
+ json_dict['modality'] = {
125
+ "0": "EM",
126
+ }
127
+ json_dict['labels'] = {i: str(i) for i in range(2)}
128
+
129
+ json_dict['numTraining'] = 5
130
+ json_dict['numTest'] = 1
131
+ json_dict['training'] = [{'image': "./imagesTr/sample_%s.nii.gz" % i, "label": "./labelsTr/sample_%s.nii.gz" % i} for i in
132
+ ['a', 'b', 'c', 'd', 'e']]
133
+
134
+ json_dict['test'] = ["./imagesTs/sample_a+.nii.gz", "./imagesTs/sample_b+.nii.gz", "./imagesTs/sample_c+.nii.gz"]
135
+
136
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
137
+
138
+ out_preprocessed = join(preprocessing_output_dir, foldername)
139
+ maybe_mkdir_p(out_preprocessed)
140
+ # manual splits. we train 5 models on all three datasets
141
+ splits = [{'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
142
+ {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
143
+ {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
144
+ {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
145
+ {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]}]
146
+ save_pickle(splits, join(out_preprocessed, "splits_final.pkl"))
nnunet/dataset_conversion/Task062_NIHPancreas.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from collections import OrderedDict
17
+ from nnunet.paths import nnUNet_raw_data
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ import shutil
20
+ from multiprocessing import Pool
21
+ import nibabel
22
+
23
+
24
+ def reorient(filename):
25
+ img = nibabel.load(filename)
26
+ img = nibabel.as_closest_canonical(img)
27
+ nibabel.save(img, filename)
28
+
29
+
30
+ if __name__ == "__main__":
31
+ base = "/media/fabian/DeepLearningData/Pancreas-CT"
32
+
33
+ # reorient
34
+ p = Pool(8)
35
+ results = []
36
+
37
+ for f in subfiles(join(base, "data"), suffix=".nii.gz"):
38
+ results.append(p.map_async(reorient, (f, )))
39
+ _ = [i.get() for i in results]
40
+
41
+ for f in subfiles(join(base, "TCIA_pancreas_labels-02-05-2017"), suffix=".nii.gz"):
42
+ results.append(p.map_async(reorient, (f, )))
43
+ _ = [i.get() for i in results]
44
+
45
+ task_id = 62
46
+ task_name = "NIHPancreas"
47
+
48
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
49
+
50
+ out_base = join(nnUNet_raw_data, foldername)
51
+ imagestr = join(out_base, "imagesTr")
52
+ imagests = join(out_base, "imagesTs")
53
+ labelstr = join(out_base, "labelsTr")
54
+ maybe_mkdir_p(imagestr)
55
+ maybe_mkdir_p(imagests)
56
+ maybe_mkdir_p(labelstr)
57
+
58
+ train_patient_names = []
59
+ test_patient_names = []
60
+ cases = list(range(1, 83))
61
+ folder_data = join(base, "data")
62
+ folder_labels = join(base, "TCIA_pancreas_labels-02-05-2017")
63
+ for c in cases:
64
+ casename = "pancreas_%04.0d" % c
65
+ shutil.copy(join(folder_data, "PANCREAS_%04.0d.nii.gz" % c), join(imagestr, casename + "_0000.nii.gz"))
66
+ shutil.copy(join(folder_labels, "label%04.0d.nii.gz" % c), join(labelstr, casename + ".nii.gz"))
67
+ train_patient_names.append(casename)
68
+
69
+ json_dict = OrderedDict()
70
+ json_dict['name'] = task_name
71
+ json_dict['description'] = task_name
72
+ json_dict['tensorImageSize'] = "4D"
73
+ json_dict['reference'] = "see website"
74
+ json_dict['licence'] = "see website"
75
+ json_dict['release'] = "0.0"
76
+ json_dict['modality'] = {
77
+ "0": "CT",
78
+ }
79
+ json_dict['labels'] = {
80
+ "0": "background",
81
+ "1": "Pancreas",
82
+ }
83
+ json_dict['numTraining'] = len(train_patient_names)
84
+ json_dict['numTest'] = len(test_patient_names)
85
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
86
+ train_patient_names]
87
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
88
+
89
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task064_KiTS_labelsFixed.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import shutil
17
+ from batchgenerators.utilities.file_and_folder_operations import *
18
+ from nnunet.paths import nnUNet_raw_data
19
+
20
+
21
+ if __name__ == "__main__":
22
+ """
23
+ This is the KiTS dataset after Nick fixed all the labels that had errors. Downloaded on Jan 6th 2020
24
+ """
25
+
26
+ base = "/media/fabian/My Book/datasets/KiTS_clean/kits19/data"
27
+
28
+ task_id = 64
29
+ task_name = "KiTS_labelsFixed"
30
+
31
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
32
+
33
+ out_base = join(nnUNet_raw_data, foldername)
34
+ imagestr = join(out_base, "imagesTr")
35
+ imagests = join(out_base, "imagesTs")
36
+ labelstr = join(out_base, "labelsTr")
37
+ maybe_mkdir_p(imagestr)
38
+ maybe_mkdir_p(imagests)
39
+ maybe_mkdir_p(labelstr)
40
+
41
+ train_patient_names = []
42
+ test_patient_names = []
43
+ all_cases = subfolders(base, join=False)
44
+
45
+ train_patients = all_cases[:210]
46
+ test_patients = all_cases[210:]
47
+
48
+ for p in train_patients:
49
+ curr = join(base, p)
50
+ label_file = join(curr, "segmentation.nii.gz")
51
+ image_file = join(curr, "imaging.nii.gz")
52
+ shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
53
+ shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
54
+ train_patient_names.append(p)
55
+
56
+ for p in test_patients:
57
+ curr = join(base, p)
58
+ image_file = join(curr, "imaging.nii.gz")
59
+ shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
60
+ test_patient_names.append(p)
61
+
62
+ json_dict = {}
63
+ json_dict['name'] = "KiTS"
64
+ json_dict['description'] = "kidney and kidney tumor segmentation"
65
+ json_dict['tensorImageSize'] = "4D"
66
+ json_dict['reference'] = "KiTS data for nnunet"
67
+ json_dict['licence'] = ""
68
+ json_dict['release'] = "0.0"
69
+ json_dict['modality'] = {
70
+ "0": "CT",
71
+ }
72
+ json_dict['labels'] = {
73
+ "0": "background",
74
+ "1": "Kidney",
75
+ "2": "Tumor"
76
+ }
77
+
78
+ json_dict['numTraining'] = len(train_patient_names)
79
+ json_dict['numTest'] = len(test_patient_names)
80
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
81
+ train_patient_names]
82
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
83
+
84
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task065_KiTS_NicksLabels.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import shutil
17
+
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ from nnunet.paths import nnUNet_raw_data
20
+
21
+ if __name__ == "__main__":
22
+ """
23
+ Nick asked me to rerun the training with other labels (the Kidney region is defined differently).
24
+
25
+ These labels operate in interpolated spacing. I don't like that but that's how it is
26
+ """
27
+
28
+ base = "/media/fabian/My Book/datasets/KiTS_NicksLabels/kits19/data"
29
+ labelsdir = "/media/fabian/My Book/datasets/KiTS_NicksLabels/filled_labels"
30
+
31
+ task_id = 65
32
+ task_name = "KiTS_NicksLabels"
33
+
34
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
35
+
36
+ out_base = join(nnUNet_raw_data, foldername)
37
+ imagestr = join(out_base, "imagesTr")
38
+ imagests = join(out_base, "imagesTs")
39
+ labelstr = join(out_base, "labelsTr")
40
+ maybe_mkdir_p(imagestr)
41
+ maybe_mkdir_p(imagests)
42
+ maybe_mkdir_p(labelstr)
43
+
44
+ train_patient_names = []
45
+ test_patient_names = []
46
+ all_cases = subfolders(base, join=False)
47
+
48
+ train_patients = all_cases[:210]
49
+ test_patients = all_cases[210:]
50
+
51
+ for p in train_patients:
52
+ curr = join(base, p)
53
+ label_file = join(labelsdir, p + ".nii.gz")
54
+ image_file = join(curr, "imaging.nii.gz")
55
+ shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
56
+ shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
57
+ train_patient_names.append(p)
58
+
59
+ for p in test_patients:
60
+ curr = join(base, p)
61
+ image_file = join(curr, "imaging.nii.gz")
62
+ shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
63
+ test_patient_names.append(p)
64
+
65
+ json_dict = {}
66
+ json_dict['name'] = "KiTS"
67
+ json_dict['description'] = "kidney and kidney tumor segmentation"
68
+ json_dict['tensorImageSize'] = "4D"
69
+ json_dict['reference'] = "KiTS data for nnunet"
70
+ json_dict['licence'] = ""
71
+ json_dict['release'] = "0.0"
72
+ json_dict['modality'] = {
73
+ "0": "CT",
74
+ }
75
+ json_dict['labels'] = {
76
+ "0": "background",
77
+ "1": "Kidney",
78
+ "2": "Tumor"
79
+ }
80
+
81
+ json_dict['numTraining'] = len(train_patient_names)
82
+ json_dict['numTest'] = len(test_patient_names)
83
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
84
+ train_patient_names]
85
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
86
+
87
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task069_CovidSeg.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+
3
+ from batchgenerators.utilities.file_and_folder_operations import *
4
+ import SimpleITK as sitk
5
+ from nnunet.paths import nnUNet_raw_data
6
+
7
+ if __name__ == '__main__':
8
+ #data is available at http://medicalsegmentation.com/covid19/
9
+ download_dir = '/home/fabian/Downloads'
10
+
11
+ task_id = 69
12
+ task_name = "CovidSeg"
13
+
14
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
15
+
16
+ out_base = join(nnUNet_raw_data, foldername)
17
+ imagestr = join(out_base, "imagesTr")
18
+ imagests = join(out_base, "imagesTs")
19
+ labelstr = join(out_base, "labelsTr")
20
+ maybe_mkdir_p(imagestr)
21
+ maybe_mkdir_p(imagests)
22
+ maybe_mkdir_p(labelstr)
23
+
24
+ train_patient_names = []
25
+ test_patient_names = []
26
+
27
+ # the niftis are 3d, but they are just stacks of 2d slices from different patients. So no 3d U-Net, please
28
+
29
+ # the training stack has 100 slices, so we split it into 5 equally sized parts (20 slices each) for cross-validation
30
+ training_data = sitk.GetArrayFromImage(sitk.ReadImage(join(download_dir, 'tr_im.nii.gz')))
31
+ training_labels = sitk.GetArrayFromImage(sitk.ReadImage(join(download_dir, 'tr_mask.nii.gz')))
32
+
33
+ for f in range(5):
34
+ this_name = 'part_%d' % f
35
+ data = training_data[f::5]
36
+ labels = training_labels[f::5]
37
+ sitk.WriteImage(sitk.GetImageFromArray(data), join(imagestr, this_name + '_0000.nii.gz'))
38
+ sitk.WriteImage(sitk.GetImageFromArray(labels), join(labelstr, this_name + '.nii.gz'))
39
+ train_patient_names.append(this_name)
40
+
41
+ shutil.copy(join(download_dir, 'val_im.nii.gz'), join(imagests, 'val_im.nii.gz'))
42
+
43
+ test_patient_names.append('val_im')
44
+
45
+ json_dict = {}
46
+ json_dict['name'] = task_name
47
+ json_dict['description'] = ""
48
+ json_dict['tensorImageSize'] = "4D"
49
+ json_dict['reference'] = ""
50
+ json_dict['licence'] = ""
51
+ json_dict['release'] = "0.0"
52
+ json_dict['modality'] = {
53
+ "0": "nonct",
54
+ }
55
+ json_dict['labels'] = {
56
+ "0": "background",
57
+ "1": "stuff1",
58
+ "2": "stuff2",
59
+ "3": "stuff3",
60
+ }
61
+
62
+ json_dict['numTraining'] = len(train_patient_names)
63
+ json_dict['numTest'] = len(test_patient_names)
64
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
65
+ train_patient_names]
66
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
67
+
68
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from multiprocessing import Pool
16
+ import SimpleITK as sitk
17
+ import numpy as np
18
+ from batchgenerators.utilities.file_and_folder_operations import *
19
+ from nnunet.paths import nnUNet_raw_data
20
+ from nnunet.paths import preprocessing_output_dir
21
+ from skimage.io import imread
22
+
23
+
24
+ def load_tiff_convert_to_nifti(img_file, lab_file, img_out_base, anno_out, spacing):
25
+ img = imread(img_file)
26
+ img_itk = sitk.GetImageFromArray(img.astype(np.float32))
27
+ img_itk.SetSpacing(np.array(spacing)[::-1])
28
+ sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
29
+
30
+ if lab_file is not None:
31
+ l = imread(lab_file)
32
+ l[l > 0] = 1
33
+ l_itk = sitk.GetImageFromArray(l.astype(np.uint8))
34
+ l_itk.SetSpacing(np.array(spacing)[::-1])
35
+ sitk.WriteImage(l_itk, anno_out)
36
+
37
+
38
+ def prepare_task(base, task_id, task_name, spacing):
39
+ p = Pool(16)
40
+
41
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
42
+
43
+ out_base = join(nnUNet_raw_data, foldername)
44
+ imagestr = join(out_base, "imagesTr")
45
+ imagests = join(out_base, "imagesTs")
46
+ labelstr = join(out_base, "labelsTr")
47
+ maybe_mkdir_p(imagestr)
48
+ maybe_mkdir_p(imagests)
49
+ maybe_mkdir_p(labelstr)
50
+
51
+ train_patient_names = []
52
+ test_patient_names = []
53
+ res = []
54
+
55
+ for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
56
+ train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
57
+ for t in train_cases:
58
+ casename = train_sequence + "_" + t[:-4]
59
+ img_file = join(base + '_train', train_sequence, t)
60
+ lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
61
+ if not isfile(lab_file):
62
+ continue
63
+ img_out_base = join(imagestr, casename)
64
+ anno_out = join(labelstr, casename + ".nii.gz")
65
+ res.append(
66
+ p.starmap_async(load_tiff_convert_to_nifti, ((img_file, lab_file, img_out_base, anno_out, spacing),)))
67
+ train_patient_names.append(casename)
68
+
69
+ for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
70
+ test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
71
+ for t in test_cases:
72
+ casename = test_sequence + "_" + t[:-4]
73
+ img_file = join(base + '_test', test_sequence, t)
74
+ lab_file = None
75
+ img_out_base = join(imagests, casename)
76
+ anno_out = None
77
+ res.append(
78
+ p.starmap_async(load_tiff_convert_to_nifti, ((img_file, lab_file, img_out_base, anno_out, spacing),)))
79
+ test_patient_names.append(casename)
80
+
81
+ _ = [i.get() for i in res]
82
+
83
+ json_dict = {}
84
+ json_dict['name'] = task_name
85
+ json_dict['description'] = ""
86
+ json_dict['tensorImageSize'] = "4D"
87
+ json_dict['reference'] = ""
88
+ json_dict['licence'] = ""
89
+ json_dict['release'] = "0.0"
90
+ json_dict['modality'] = {
91
+ "0": "BF",
92
+ }
93
+ json_dict['labels'] = {
94
+ "0": "background",
95
+ "1": "cell",
96
+ }
97
+
98
+ json_dict['numTraining'] = len(train_patient_names)
99
+ json_dict['numTest'] = len(test_patient_names)
100
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
101
+ train_patient_names]
102
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
103
+
104
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
105
+ p.close()
106
+ p.join()
107
+
108
+
109
+ if __name__ == "__main__":
110
+ base = "/media/fabian/My Book/datasets/CellTrackingChallenge/Fluo-C3DH-A549_ManAndSim"
111
+ task_id = 75
112
+ task_name = 'Fluo_C3DH_A549_ManAndSim'
113
+ spacing = (1, 0.126, 0.126)
114
+ prepare_task(base, task_id, task_name, spacing)
115
+
116
+ task_name = "Task075_Fluo_C3DH_A549_ManAndSim"
117
+ labelsTr = join(nnUNet_raw_data, task_name, "labelsTr")
118
+ cases = subfiles(labelsTr, suffix='.nii.gz', join=False)
119
+ splits = []
120
+ splits.append(
121
+ {'train': [i[:-7] for i in cases if i.startswith('01_') or i.startswith('02_SIM')],
122
+ 'val': [i[:-7] for i in cases if i.startswith('02_') and not i.startswith('02_SIM')]}
123
+ )
124
+ splits.append(
125
+ {'train': [i[:-7] for i in cases if i.startswith('02_') or i.startswith('01_SIM')],
126
+ 'val': [i[:-7] for i in cases if i.startswith('01_') and not i.startswith('01_SIM')]}
127
+ )
128
+ splits.append(
129
+ {'train': [i[:-7] for i in cases if i.startswith('01_') or i.startswith('02_') and not i.startswith('02_SIM')],
130
+ 'val': [i[:-7] for i in cases if i.startswith('02_SIM')]}
131
+ )
132
+ splits.append(
133
+ {'train': [i[:-7] for i in cases if i.startswith('02_') or i.startswith('01_') and not i.startswith('01_SIM')],
134
+ 'val': [i[:-7] for i in cases if i.startswith('01_SIM')]}
135
+ )
136
+ save_pickle(splits, join(preprocessing_output_dir, task_name, "splits_final.pkl"))
137
+
nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from multiprocessing import Pool
17
+ from multiprocessing.dummy import Pool
18
+
19
+ import SimpleITK as sitk
20
+ import numpy as np
21
+ from batchgenerators.utilities.file_and_folder_operations import *
22
+ from skimage.io import imread
23
+ from skimage.io import imsave
24
+ from skimage.morphology import ball
25
+ from skimage.morphology import erosion
26
+ from skimage.transform import resize
27
+
28
+ from nnunet.paths import nnUNet_raw_data
29
+ from nnunet.paths import preprocessing_output_dir
30
+
31
+
32
+ def load_bmp_convert_to_nifti_borders(img_file, lab_file, img_out_base, anno_out, spacing, border_thickness=0.7):
33
+ img = imread(img_file)
34
+ img_itk = sitk.GetImageFromArray(img.astype(np.float32))
35
+ img_itk.SetSpacing(np.array(spacing)[::-1])
36
+ sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
37
+
38
+ if lab_file is not None:
39
+ l = imread(lab_file)
40
+ borders = generate_border_as_suggested_by_twollmann(l, spacing, border_thickness)
41
+ l[l > 0] = 1
42
+ l[borders == 1] = 2
43
+ l_itk = sitk.GetImageFromArray(l.astype(np.uint8))
44
+ l_itk.SetSpacing(np.array(spacing)[::-1])
45
+ sitk.WriteImage(l_itk, anno_out)
46
+
47
+
48
+ def generate_ball(spacing, radius, dtype=int):
49
+ radius_in_voxels = np.round(radius / np.array(spacing)).astype(int)
50
+ n = 2 * radius_in_voxels + 1
51
+ ball_iso = ball(max(n) * 2, dtype=np.float64)
52
+ ball_resampled = resize(ball_iso, n, 1, 'constant', 0, clip=True, anti_aliasing=False, preserve_range=True)
53
+ ball_resampled[ball_resampled > 0.5] = 1
54
+ ball_resampled[ball_resampled <= 0.5] = 0
55
+ return ball_resampled.astype(dtype)
56
+
57
+
58
+ def generate_border_as_suggested_by_twollmann(label_img: np.ndarray, spacing, border_thickness: float = 2) -> np.ndarray:
59
+ border = np.zeros_like(label_img)
60
+ selem = generate_ball(spacing, border_thickness)
61
+ for l in np.unique(label_img):
62
+ if l == 0: continue
63
+ mask = (label_img == l).astype(int)
64
+ eroded = erosion(mask, selem)
65
+ border[(eroded == 0) & (mask != 0)] = 1
66
+ return border
67
+
68
+
69
+ def find_differences(labelstr1, labelstr2):
70
+ for n in subfiles(labelstr1, suffix='.nii.gz', join=False):
71
+ a = sitk.GetArrayFromImage(sitk.ReadImage(join(labelstr1, n)))
72
+ b = sitk.GetArrayFromImage(sitk.ReadImage(join(labelstr2, n)))
73
+ print(n, np.sum(a != b))
74
+
75
+
76
+ def prepare_task(base, task_id, task_name, spacing, border_thickness: float = 15, processes: int = 16):
77
+ p = Pool(processes)
78
+
79
+ foldername = "Task%03.0d_%s" % (task_id, task_name)
80
+
81
+ out_base = join(nnUNet_raw_data, foldername)
82
+ imagestr = join(out_base, "imagesTr")
83
+ imagests = join(out_base, "imagesTs")
84
+ labelstr = join(out_base, "labelsTr")
85
+ maybe_mkdir_p(imagestr)
86
+ maybe_mkdir_p(imagests)
87
+ maybe_mkdir_p(labelstr)
88
+
89
+ train_patient_names = []
90
+ test_patient_names = []
91
+ res = []
92
+
93
+ for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
94
+ train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
95
+ for t in train_cases:
96
+ casename = train_sequence + "_" + t[:-4]
97
+ img_file = join(base + '_train', train_sequence, t)
98
+ lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
99
+ if not isfile(lab_file):
100
+ continue
101
+ img_out_base = join(imagestr, casename)
102
+ anno_out = join(labelstr, casename + ".nii.gz")
103
+ res.append(
104
+ p.starmap_async(load_bmp_convert_to_nifti_borders, ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
105
+ train_patient_names.append(casename)
106
+
107
+ for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
108
+ test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
109
+ for t in test_cases:
110
+ casename = test_sequence + "_" + t[:-4]
111
+ img_file = join(base + '_test', test_sequence, t)
112
+ lab_file = None
113
+ img_out_base = join(imagests, casename)
114
+ anno_out = None
115
+ res.append(
116
+ p.starmap_async(load_bmp_convert_to_nifti_borders, ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
117
+ test_patient_names.append(casename)
118
+
119
+ _ = [i.get() for i in res]
120
+
121
+ json_dict = {}
122
+ json_dict['name'] = task_name
123
+ json_dict['description'] = ""
124
+ json_dict['tensorImageSize'] = "4D"
125
+ json_dict['reference'] = ""
126
+ json_dict['licence'] = ""
127
+ json_dict['release'] = "0.0"
128
+ json_dict['modality'] = {
129
+ "0": "BF",
130
+ }
131
+ json_dict['labels'] = {
132
+ "0": "background",
133
+ "1": "cell",
134
+ "2": "border",
135
+ }
136
+
137
+ json_dict['numTraining'] = len(train_patient_names)
138
+ json_dict['numTest'] = len(test_patient_names)
139
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
140
+ train_patient_names]
141
+ json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
142
+
143
+ save_json(json_dict, os.path.join(out_base, "dataset.json"))
144
+ p.close()
145
+ p.join()
146
+
147
+
148
+ def plot_images(folder, output_folder):
149
+ maybe_mkdir_p(output_folder)
150
+ import matplotlib.pyplot as plt
151
+ for i in subfiles(folder, suffix='.nii.gz', join=False):
152
+ img = sitk.GetArrayFromImage(sitk.ReadImage(join(folder, i)))
153
+ center_slice = img[img.shape[0]//2]
154
+ plt.imsave(join(output_folder, i[:-7] + '.png'), center_slice)
155
+
156
+
157
+ def convert_to_tiff(nifti_image: str, output_name: str):
158
+ npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti_image))
159
+ imsave(output_name, npy.astype(np.uint16), compress=6)
160
+
161
+
162
+ def convert_to_instance_seg(arr: np.ndarray, spacing: tuple = (0.2, 0.125, 0.125)):
163
+ from skimage.morphology import label, dilation
164
+ # 1 is core, 2 is border
165
+ objects = label((arr == 1).astype(int))
166
+ final = np.copy(objects)
167
+ remaining_border = arr == 2
168
+ current = np.copy(objects)
169
+ dilated_mm = np.array((0, 0, 0))
170
+ spacing = np.array(spacing)
171
+
172
+ while np.sum(remaining_border) > 0:
173
+ strel_size = [0, 0, 0]
174
+ maximum_dilation = max(dilated_mm)
175
+ for i in range(3):
176
+ if spacing[i] == min(spacing):
177
+ strel_size[i] = 1
178
+ continue
179
+ if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
180
+ strel_size[i] = 1
181
+ ball_here = ball(1)
182
+
183
+ if strel_size[0] == 0: ball_here = ball_here[1:2]
184
+ if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
185
+ if strel_size[2] == 0: ball_here = ball_here[:, :, 1:2]
186
+
187
+ #print(1)
188
+ dilated = dilation(current, ball_here)
189
+ diff = (current == 0) & (dilated != current)
190
+ final[diff & remaining_border] = dilated[diff & remaining_border]
191
+ remaining_border[diff] = 0
192
+ current = dilated
193
+ dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(3)]
194
+ return final.astype(np.uint32)
195
+
196
+
197
+ def convert_to_instance_seg2(arr: np.ndarray, spacing: tuple = (0.2, 0.125, 0.125), small_center_threshold=30,
198
+ isolated_border_as_separate_instance_threshold: int = 15):
199
+ from skimage.morphology import label, dilation
200
+ # we first identify centers that are too small and set them to be border. This should remove false positive instances
201
+ objects = label((arr == 1).astype(int))
202
+ for o in np.unique(objects):
203
+ if o > 0 and np.sum(objects == o) <= small_center_threshold:
204
+ arr[objects == o] = 2
205
+
206
+ # 1 is core, 2 is border
207
+ objects = label((arr == 1).astype(int))
208
+ final = np.copy(objects)
209
+ remaining_border = arr == 2
210
+ current = np.copy(objects)
211
+ dilated_mm = np.array((0, 0, 0))
212
+ spacing = np.array(spacing)
213
+
214
+ while np.sum(remaining_border) > 0:
215
+ strel_size = [0, 0, 0]
216
+ maximum_dilation = max(dilated_mm)
217
+ for i in range(3):
218
+ if spacing[i] == min(spacing):
219
+ strel_size[i] = 1
220
+ continue
221
+ if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
222
+ strel_size[i] = 1
223
+ ball_here = ball(1)
224
+
225
+ if strel_size[0] == 0: ball_here = ball_here[1:2]
226
+ if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
227
+ if strel_size[2] == 0: ball_here = ball_here[:, :, 1:2]
228
+
229
+ #print(1)
230
+ dilated = dilation(current, ball_here)
231
+ diff = (current == 0) & (dilated != current)
232
+ final[diff & remaining_border] = dilated[diff & remaining_border]
233
+ remaining_border[diff] = 0
234
+ current = dilated
235
+ dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(3)]
236
+
237
+ # what can happen is that a cell is so small that the network only predicted border and no core. This cell will be
238
+ # fused with the nearest other instance, which we don't want. Therefore we identify isolated border predictions and
239
+ # give them a separate instance id
240
+ # we identify isolated border predictions by checking each foreground object in arr and see whether this object
241
+ # also contains label 1
242
+ max_label = np.max(final)
243
+
244
+ foreground_objects = label((arr != 0).astype(int))
245
+ for i in np.unique(foreground_objects):
246
+ if i > 0 and (1 not in np.unique(arr[foreground_objects==i])):
247
+ size_of_object = np.sum(foreground_objects==i)
248
+ if size_of_object >= isolated_border_as_separate_instance_threshold:
249
+ final[foreground_objects == i] = max_label + 1
250
+ max_label += 1
251
+ #print('yeah boi')
252
+
253
+ return final.astype(np.uint32)
254
+
255
+
256
+ def load_instanceseg_save(in_file: str, out_file:str, better: bool):
257
+ itk_img = sitk.ReadImage(in_file)
258
+ if not better:
259
+ instanceseg = convert_to_instance_seg(sitk.GetArrayFromImage(itk_img))
260
+ else:
261
+ instanceseg = convert_to_instance_seg2(sitk.GetArrayFromImage(itk_img))
262
+ itk_out = sitk.GetImageFromArray(instanceseg)
263
+ itk_out.CopyInformation(itk_img)
264
+ sitk.WriteImage(itk_out, out_file)
265
+
266
+
267
+ def convert_all_to_instance(input_folder: str, output_folder: str, processes: int = 24, better: bool = False):
268
+ maybe_mkdir_p(output_folder)
269
+ p = Pool(processes)
270
+ files = subfiles(input_folder, suffix='.nii.gz', join=False)
271
+ output_files = [join(output_folder, i) for i in files]
272
+ input_files = [join(input_folder, i) for i in files]
273
+ better = [better] * len(files)
274
+ r = p.starmap_async(load_instanceseg_save, zip(input_files, output_files, better))
275
+ _ = r.get()
276
+ p.close()
277
+ p.join()
278
+
279
+
280
+ if __name__ == "__main__":
281
+ base = "/home/fabian/data/Fluo-N3DH-SIM"
282
+ task_id = 76
283
+ task_name = 'Fluo_N3DH_SIM'
284
+ spacing = (0.2, 0.125, 0.125)
285
+ border_thickness = 0.5
286
+
287
+ prepare_task(base, task_id, task_name, spacing, border_thickness, 12)
288
+
289
+ # we need custom splits
290
+ task_name = "Task076_Fluo_N3DH_SIM"
291
+ labelsTr = join(nnUNet_raw_data, task_name, "labelsTr")
292
+ cases = subfiles(labelsTr, suffix='.nii.gz', join=False)
293
+ splits = []
294
+ splits.append(
295
+ {'train': [i[:-7] for i in cases if i.startswith('01_')],
296
+ 'val': [i[:-7] for i in cases if i.startswith('02_')]}
297
+ )
298
+ splits.append(
299
+ {'train': [i[:-7] for i in cases if i.startswith('02_')],
300
+ 'val': [i[:-7] for i in cases if i.startswith('01_')]}
301
+ )
302
+
303
+ maybe_mkdir_p(join(preprocessing_output_dir, task_name))
304
+
305
+ save_pickle(splits, join(preprocessing_output_dir, task_name, "splits_final.pkl"))
306
+
307
+ # test set was converted to instance seg with convert_all_to_instance with better=True
308
+
309
+ # convert to tiff with convert_to_tiff
310
+
311
+
312
+
nnunet/dataset_conversion/Task082_BraTS_2020.py ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import shutil
15
+ from collections import OrderedDict
16
+ from copy import deepcopy
17
+ from multiprocessing.pool import Pool
18
+ from typing import Tuple
19
+
20
+ import SimpleITK as sitk
21
+ import numpy as np
22
+ import scipy.stats as ss
23
+ from batchgenerators.utilities.file_and_folder_operations import *
24
+ from medpy.metric import dc, hd95
25
+ from nnunet.dataset_conversion.Task032_BraTS_2018 import convert_labels_back_to_BraTS_2018_2019_convention
26
+ from nnunet.dataset_conversion.Task043_BraTS_2019 import copy_BraTS_segmentation_and_convert_labels
27
+ from nnunet.evaluation.region_based_evaluation import get_brats_regions, evaluate_regions
28
+ from nnunet.paths import nnUNet_raw_data
29
+ from nnunet.postprocessing.consolidate_postprocessing import collect_cv_niftis
30
+
31
+
32
+ def apply_brats_threshold(fname, out_dir, threshold, replace_with):
33
+ img_itk = sitk.ReadImage(fname)
34
+ img_npy = sitk.GetArrayFromImage(img_itk)
35
+ s = np.sum(img_npy == 3)
36
+ if s < threshold:
37
+ # print(s, fname)
38
+ img_npy[img_npy == 3] = replace_with
39
+ img_itk_postprocessed = sitk.GetImageFromArray(img_npy)
40
+ img_itk_postprocessed.CopyInformation(img_itk)
41
+ sitk.WriteImage(img_itk_postprocessed, join(out_dir, fname.split("/")[-1]))
42
+
43
+
44
+ def load_niftis_threshold_compute_dice(gt_file, pred_file, thresholds: Tuple[list, tuple]):
45
+ gt = sitk.GetArrayFromImage(sitk.ReadImage(gt_file))
46
+ pred = sitk.GetArrayFromImage(sitk.ReadImage(pred_file))
47
+ mask_pred = pred == 3
48
+ mask_gt = gt == 3
49
+ num_pred = np.sum(mask_pred)
50
+
51
+ num_gt = np.sum(mask_gt)
52
+ dice = dc(mask_pred, mask_gt)
53
+
54
+ res_dice = {}
55
+ res_was_smaller = {}
56
+
57
+ for t in thresholds:
58
+ was_smaller = False
59
+
60
+ if num_pred < t:
61
+ was_smaller = True
62
+ if num_gt == 0:
63
+ dice_here = 1.
64
+ else:
65
+ dice_here = 0.
66
+ else:
67
+ dice_here = deepcopy(dice)
68
+
69
+ res_dice[t] = dice_here
70
+ res_was_smaller[t] = was_smaller
71
+
72
+ return res_was_smaller, res_dice
73
+
74
+
75
+ def apply_threshold_to_folder(folder_in, folder_out, threshold, replace_with, processes=24):
76
+ maybe_mkdir_p(folder_out)
77
+ niftis = subfiles(folder_in, suffix='.nii.gz', join=True)
78
+
79
+ p = Pool(processes)
80
+ p.starmap(apply_brats_threshold, zip(niftis, [folder_out]*len(niftis), [threshold]*len(niftis), [replace_with] * len(niftis)))
81
+
82
+ p.close()
83
+ p.join()
84
+
85
+
86
+ def determine_brats_postprocessing(folder_with_preds, folder_with_gt, postprocessed_output_dir, processes=8,
87
+ thresholds=(0, 10, 50, 100, 200, 500, 750, 1000, 1500, 2500, 10000), replace_with=2):
88
+ # find pairs
89
+ nifti_gt = subfiles(folder_with_gt, suffix=".nii.gz", sort=True)
90
+
91
+ p = Pool(processes)
92
+
93
+ nifti_pred = subfiles(folder_with_preds, suffix='.nii.gz', sort=True)
94
+
95
+ results = p.starmap_async(load_niftis_threshold_compute_dice, zip(nifti_gt, nifti_pred, [thresholds] * len(nifti_pred)))
96
+ results = results.get()
97
+
98
+ all_dc_per_threshold = {}
99
+ for t in thresholds:
100
+ all_dc_per_threshold[t] = np.array([i[1][t] for i in results])
101
+ print(t, np.mean(all_dc_per_threshold[t]))
102
+
103
+ means = [np.mean(all_dc_per_threshold[t]) for t in thresholds]
104
+ best_threshold = thresholds[np.argmax(means)]
105
+ print('best', best_threshold, means[np.argmax(means)])
106
+
107
+ maybe_mkdir_p(postprocessed_output_dir)
108
+
109
+ p.starmap(apply_brats_threshold, zip(nifti_pred, [postprocessed_output_dir]*len(nifti_pred), [best_threshold]*len(nifti_pred), [replace_with] * len(nifti_pred)))
110
+
111
+ p.close()
112
+ p.join()
113
+
114
+ save_pickle((thresholds, means, best_threshold, all_dc_per_threshold), join(postprocessed_output_dir, "threshold.pkl"))
115
+
116
+
117
+ def collect_and_prepare(base_dir, num_processes = 12, clean=False):
118
+ """
119
+ collect all cv_niftis, compute brats metrics, compute enh tumor thresholds and summarize in csv
120
+ :param base_dir:
121
+ :return:
122
+ """
123
+ out = join(base_dir, 'cv_results')
124
+ out_pp = join(base_dir, 'cv_results_pp')
125
+ experiments = subfolders(base_dir, join=False, prefix='nnUNetTrainer')
126
+ regions = get_brats_regions()
127
+ gt_dir = join(base_dir, 'gt_niftis')
128
+ replace_with = 2
129
+
130
+ failed = []
131
+ successful = []
132
+ for e in experiments:
133
+ print(e)
134
+ try:
135
+ o = join(out, e)
136
+ o_p = join(out_pp, e)
137
+ maybe_mkdir_p(o)
138
+ maybe_mkdir_p(o_p)
139
+ collect_cv_niftis(join(base_dir, e), o)
140
+ if clean or not isfile(join(o, 'summary.csv')):
141
+ evaluate_regions(o, gt_dir, regions, num_processes)
142
+ if clean or not isfile(join(o_p, 'threshold.pkl')):
143
+ determine_brats_postprocessing(o, gt_dir, o_p, num_processes, thresholds=list(np.arange(0, 760, 10)), replace_with=replace_with)
144
+ if clean or not isfile(join(o_p, 'summary.csv')):
145
+ evaluate_regions(o_p, gt_dir, regions, num_processes)
146
+ successful.append(e)
147
+ except Exception as ex:
148
+ print("\nERROR\n", e, ex, "\n")
149
+ failed.append(e)
150
+
151
+ # we are interested in the mean (nan is 1) column
152
+ with open(join(base_dir, 'cv_summary.csv'), 'w') as f:
153
+ f.write('name,whole,core,enh,mean\n')
154
+ for e in successful:
155
+ expected_nopp = join(out, e, 'summary.csv')
156
+ expected_pp = join(out, out_pp, e, 'summary.csv')
157
+ if isfile(expected_nopp):
158
+ res = np.loadtxt(expected_nopp, dtype=str, skiprows=0, delimiter=',')[-2]
159
+ as_numeric = [float(i) for i in res[1:]]
160
+ f.write(e + '_noPP,')
161
+ f.write("%0.4f," % as_numeric[0])
162
+ f.write("%0.4f," % as_numeric[1])
163
+ f.write("%0.4f," % as_numeric[2])
164
+ f.write("%0.4f\n" % np.mean(as_numeric))
165
+ if isfile(expected_pp):
166
+ res = np.loadtxt(expected_pp, dtype=str, skiprows=0, delimiter=',')[-2]
167
+ as_numeric = [float(i) for i in res[1:]]
168
+ f.write(e + '_PP,')
169
+ f.write("%0.4f," % as_numeric[0])
170
+ f.write("%0.4f," % as_numeric[1])
171
+ f.write("%0.4f," % as_numeric[2])
172
+ f.write("%0.4f\n" % np.mean(as_numeric))
173
+
174
+ # this just crawls the folders and evaluates what it finds
175
+ with open(join(base_dir, 'cv_summary2.csv'), 'w') as f:
176
+ for folder in ['cv_results', 'cv_results_pp']:
177
+ for ex in subdirs(join(base_dir, folder), join=False):
178
+ print(folder, ex)
179
+ expected = join(base_dir, folder, ex, 'summary.csv')
180
+ if clean or not isfile(expected):
181
+ evaluate_regions(join(base_dir, folder, ex), gt_dir, regions, num_processes)
182
+ if isfile(expected):
183
+ res = np.loadtxt(expected, dtype=str, skiprows=0, delimiter=',')[-2]
184
+ as_numeric = [float(i) for i in res[1:]]
185
+ f.write('%s__%s,' % (folder, ex))
186
+ f.write("%0.4f," % as_numeric[0])
187
+ f.write("%0.4f," % as_numeric[1])
188
+ f.write("%0.4f," % as_numeric[2])
189
+ f.write("%0.4f\n" % np.mean(as_numeric))
190
+
191
+ f.write('name,whole,core,enh,mean\n')
192
+ for e in successful:
193
+ expected_nopp = join(out, e, 'summary.csv')
194
+ expected_pp = join(out, out_pp, e, 'summary.csv')
195
+ if isfile(expected_nopp):
196
+ res = np.loadtxt(expected_nopp, dtype=str, skiprows=0, delimiter=',')[-2]
197
+ as_numeric = [float(i) for i in res[1:]]
198
+ f.write(e + '_noPP,')
199
+ f.write("%0.4f," % as_numeric[0])
200
+ f.write("%0.4f," % as_numeric[1])
201
+ f.write("%0.4f," % as_numeric[2])
202
+ f.write("%0.4f\n" % np.mean(as_numeric))
203
+ if isfile(expected_pp):
204
+ res = np.loadtxt(expected_pp, dtype=str, skiprows=0, delimiter=',')[-2]
205
+ as_numeric = [float(i) for i in res[1:]]
206
+ f.write(e + '_PP,')
207
+ f.write("%0.4f," % as_numeric[0])
208
+ f.write("%0.4f," % as_numeric[1])
209
+ f.write("%0.4f," % as_numeric[2])
210
+ f.write("%0.4f\n" % np.mean(as_numeric))
211
+
212
+ # apply threshold to val set
213
+ expected_num_cases = 125
214
+ missing_valset = []
215
+ has_val_pred = []
216
+ for e in successful:
217
+ if isdir(join(base_dir, 'predVal', e)):
218
+ currdir = join(base_dir, 'predVal', e)
219
+ files = subfiles(currdir, suffix='.nii.gz', join=False)
220
+ if len(files) != expected_num_cases:
221
+ print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases))
222
+ continue
223
+ output_folder = join(base_dir, 'predVal_PP', e)
224
+ maybe_mkdir_p(output_folder)
225
+ threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
226
+ if threshold > 1000: threshold = 750 # don't make it too big!
227
+ apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
228
+ has_val_pred.append(e)
229
+ else:
230
+ print(e, 'has no valset predictions')
231
+ missing_valset.append(e)
232
+
233
+ # 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold' needs special treatment
234
+ e = 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5'
235
+ currdir = join(base_dir, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
236
+ output_folder = join(base_dir, 'predVal_PP', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
237
+ maybe_mkdir_p(output_folder)
238
+ threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
239
+ if threshold > 1000: threshold = 750 # don't make it too big!
240
+ apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
241
+
242
+ # 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold' needs special treatment
243
+ e = 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5'
244
+ currdir = join(base_dir, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
245
+ output_folder = join(base_dir, 'predVal_PP', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
246
+ maybe_mkdir_p(output_folder)
247
+ threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
248
+ if threshold > 1000: threshold = 750 # don't make it too big!
249
+ apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
250
+
251
+ # convert val set to brats labels for submission
252
+ output_converted = join(base_dir, 'converted_valSet')
253
+
254
+ for source in ['predVal', 'predVal_PP']:
255
+ for e in has_val_pred + ['nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold']:
256
+ expected_source_folder = join(base_dir, source, e)
257
+ if not isdir(expected_source_folder):
258
+ print(e, 'has no', source)
259
+ raise RuntimeError()
260
+ files = subfiles(expected_source_folder, suffix='.nii.gz', join=False)
261
+ if len(files) != expected_num_cases:
262
+ print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases))
263
+ continue
264
+ target_folder = join(output_converted, source, e)
265
+ maybe_mkdir_p(target_folder)
266
+ convert_labels_back_to_BraTS_2018_2019_convention(expected_source_folder, target_folder)
267
+
268
+ summarize_validation_set_predictions(output_converted)
269
+
270
+
271
+ def summarize_validation_set_predictions(base):
272
+ with open(join(base, 'summary.csv'), 'w') as f:
273
+ f.write('name,whole,core,enh,mean,whole,core,enh,mean\n')
274
+ for subf in subfolders(base, join=False):
275
+ for e in subfolders(join(base, subf), join=False):
276
+ expected = join(base, subf, e, 'Stats_Validation_final.csv')
277
+ if not isfile(expected):
278
+ print(subf, e, 'has missing csv')
279
+ continue
280
+ a = np.loadtxt(expected, delimiter=',', dtype=str)
281
+ assert a.shape[0] == 131, 'did not evaluate all 125 cases!'
282
+ selected_row = a[-5]
283
+ values = [float(i) for i in selected_row[1:4]]
284
+ f.write(e + "_" + subf + ',')
285
+ f.write("%0.4f," % values[1])
286
+ f.write("%0.4f," % values[2])
287
+ f.write("%0.4f," % values[0])
288
+ f.write("%0.4f," % np.mean(values))
289
+ values = [float(i) for i in selected_row[-3:]]
290
+ f.write("%0.4f," % values[1])
291
+ f.write("%0.4f," % values[2])
292
+ f.write("%0.4f," % values[0])
293
+ f.write("%0.4f\n" % np.mean(values))
294
+
295
+
296
+ def compute_BraTS_dice(ref, pred):
297
+ """
298
+ ref and gt are binary integer numpy.ndarray s
299
+ :param ref:
300
+ :param gt:
301
+ :return:
302
+ """
303
+ num_ref = np.sum(ref)
304
+ num_pred = np.sum(pred)
305
+
306
+ if num_ref == 0:
307
+ if num_pred == 0:
308
+ return 1
309
+ else:
310
+ return 0
311
+ else:
312
+ return dc(pred, ref)
313
+
314
+
315
+ def convert_all_to_BraTS(input_folder, output_folder, expected_num_cases=125):
316
+ for s in subdirs(input_folder, join=False):
317
+ nii = subfiles(join(input_folder, s), suffix='.nii.gz', join=False)
318
+ if len(nii) != expected_num_cases:
319
+ print(s)
320
+ else:
321
+ target_dir = join(output_folder, s)
322
+ convert_labels_back_to_BraTS_2018_2019_convention(join(input_folder, s), target_dir, num_processes=6)
323
+
324
+
325
+ def compute_BraTS_HD95(ref, pred):
326
+ """
327
+ ref and gt are binary integer numpy.ndarray s
328
+ spacing is assumed to be (1, 1, 1)
329
+ :param ref:
330
+ :param pred:
331
+ :return:
332
+ """
333
+ num_ref = np.sum(ref)
334
+ num_pred = np.sum(pred)
335
+
336
+ if num_ref == 0:
337
+ if num_pred == 0:
338
+ return 0
339
+ else:
340
+ return 373.12866
341
+ elif num_pred == 0 and num_ref != 0:
342
+ return 373.12866
343
+ else:
344
+ return hd95(pred, ref, (1, 1, 1))
345
+
346
+
347
+ def evaluate_BraTS_case(arr: np.ndarray, arr_gt: np.ndarray):
348
+ """
349
+ attempting to reimplement the brats evaluation scheme
350
+ assumes edema=1, non_enh=2, enh=3
351
+ :param arr:
352
+ :param arr_gt:
353
+ :return:
354
+ """
355
+ # whole tumor
356
+ mask_gt = (arr_gt != 0).astype(int)
357
+ mask_pred = (arr != 0).astype(int)
358
+ dc_whole = compute_BraTS_dice(mask_gt, mask_pred)
359
+ hd95_whole = compute_BraTS_HD95(mask_gt, mask_pred)
360
+ del mask_gt, mask_pred
361
+
362
+ # tumor core
363
+ mask_gt = (arr_gt > 1).astype(int)
364
+ mask_pred = (arr > 1).astype(int)
365
+ dc_core = compute_BraTS_dice(mask_gt, mask_pred)
366
+ hd95_core = compute_BraTS_HD95(mask_gt, mask_pred)
367
+ del mask_gt, mask_pred
368
+
369
+ # enhancing
370
+ mask_gt = (arr_gt == 3).astype(int)
371
+ mask_pred = (arr == 3).astype(int)
372
+ dc_enh = compute_BraTS_dice(mask_gt, mask_pred)
373
+ hd95_enh = compute_BraTS_HD95(mask_gt, mask_pred)
374
+ del mask_gt, mask_pred
375
+
376
+ return dc_whole, dc_core, dc_enh, hd95_whole, hd95_core, hd95_enh
377
+
378
+
379
+ def load_evaluate(filename_gt: str, filename_pred: str):
380
+ arr_pred = sitk.GetArrayFromImage(sitk.ReadImage(filename_pred))
381
+ arr_gt = sitk.GetArrayFromImage(sitk.ReadImage(filename_gt))
382
+ return evaluate_BraTS_case(arr_pred, arr_gt)
383
+
384
+
385
+ def evaluate_BraTS_folder(folder_pred, folder_gt, num_processes: int = 24, strict=False):
386
+ nii_pred = subfiles(folder_pred, suffix='.nii.gz', join=False)
387
+ if len(nii_pred) == 0:
388
+ return
389
+ nii_gt = subfiles(folder_gt, suffix='.nii.gz', join=False)
390
+ assert all([i in nii_gt for i in nii_pred]), 'not all predicted niftis have a reference file!'
391
+ if strict:
392
+ assert all([i in nii_pred for i in nii_gt]), 'not all gt niftis have a predicted file!'
393
+ p = Pool(num_processes)
394
+ nii_pred_fullpath = [join(folder_pred, i) for i in nii_pred]
395
+ nii_gt_fullpath = [join(folder_gt, i) for i in nii_pred]
396
+ results = p.starmap(load_evaluate, zip(nii_gt_fullpath, nii_pred_fullpath))
397
+ # now write to output file
398
+ with open(join(folder_pred, 'results.csv'), 'w') as f:
399
+ f.write("name,dc_whole,dc_core,dc_enh,hd95_whole,hd95_core,hd95_enh\n")
400
+ for fname, r in zip(nii_pred, results):
401
+ f.write(fname)
402
+ f.write(",%0.4f,%0.4f,%0.4f,%3.3f,%3.3f,%3.3f\n" % r)
403
+
404
+
405
+ def load_csv_for_ranking(csv_file: str):
406
+ res = np.loadtxt(csv_file, dtype='str', delimiter=',')
407
+ scores = res[1:, [1, 2, 3, -3, -2, -1]].astype(float)
408
+ scores[:, -3:] *= -1
409
+ scores[:, -3:] += 373.129
410
+ assert np.all(scores <= 373.129)
411
+ assert np.all(scores >= 0)
412
+ return scores
413
+
414
+
415
+ def rank_algorithms(data:np.ndarray):
416
+ """
417
+ data is (metrics x experiments x cases)
418
+ :param data:
419
+ :return:
420
+ """
421
+ num_metrics, num_experiments, num_cases = data.shape
422
+ ranks = np.zeros((num_metrics, num_experiments))
423
+ for m in range(6):
424
+ r = np.apply_along_axis(ss.rankdata, 0, -data[m], 'min')
425
+ ranks[m] = r.mean(1)
426
+ average_rank = np.mean(ranks, 0)
427
+ final_ranks = ss.rankdata(average_rank, 'min')
428
+ return final_ranks, average_rank, ranks
429
+
430
+
431
+ def score_and_postprocess_model_based_on_rank_then_aggregate():
432
+ """
433
+ Similarly to BraTS 2017 - BraTS 2019, each participant will be ranked for each of the X test cases. Each case
434
+ includes 3 regions of evaluation, and the metrics used to produce the rankings will be the Dice Similarity
435
+ Coefficient and the 95% Hausdorff distance. Thus, for X number of cases included in the BraTS 2020, each
436
+ participant ends up having X*3*2 rankings. The final ranking score is the average of all these rankings normalized
437
+ by the number of teams.
438
+ https://zenodo.org/record/3718904
439
+
440
+ -> let's optimize for this.
441
+
442
+ Important: the outcome very much depends on the competing models. We need some references. We only got our own,
443
+ so let's hope this still works
444
+ :return:
445
+ """
446
+ base = "/media/fabian/Results/nnUNet/3d_fullres/Task082_BraTS2020"
447
+ replace_with = 2
448
+ num_processes = 24
449
+ expected_num_cases_val = 125
450
+
451
+ # use a separate output folder from the previous experiments to ensure we are not messing things up
452
+ output_base_here = join(base, 'use_brats_ranking')
453
+ maybe_mkdir_p(output_base_here)
454
+
455
+ # collect cv niftis and compute metrics with evaluate_BraTS_folder to ensure we work with the same metrics as brats
456
+ out = join(output_base_here, 'cv_results')
457
+ experiments = subfolders(base, join=False, prefix='nnUNetTrainer')
458
+ gt_dir = join(base, 'gt_niftis')
459
+
460
+ experiments_with_full_cv = []
461
+ for e in experiments:
462
+ print(e)
463
+ o = join(out, e)
464
+ maybe_mkdir_p(o)
465
+ try:
466
+ collect_cv_niftis(join(base, e), o)
467
+ if not isfile(join(o, 'results.csv')):
468
+ evaluate_BraTS_folder(o, gt_dir, num_processes, strict=True)
469
+ experiments_with_full_cv.append(e)
470
+ except Exception as ex:
471
+ print("\nERROR\n", e, ex, "\n")
472
+ if isfile(join(o, 'results.csv')):
473
+ os.remove(join(o, 'results.csv'))
474
+
475
+ # rank the non-postprocessed models
476
+ tmp = np.loadtxt(join(out, experiments_with_full_cv[0], 'results.csv'), dtype='str', delimiter=',')
477
+ num_cases = len(tmp) - 1
478
+ data_for_ranking = np.zeros((6, len(experiments_with_full_cv), num_cases))
479
+ for i, e in enumerate(experiments_with_full_cv):
480
+ scores = load_csv_for_ranking(join(out, e, 'results.csv'))
481
+ for metric in range(6):
482
+ data_for_ranking[metric, i] = scores[:, metric]
483
+
484
+ final_ranks, average_rank, ranks = rank_algorithms(data_for_ranking)
485
+
486
+ for t in np.argsort(final_ranks):
487
+ print(final_ranks[t], average_rank[t], experiments_with_full_cv[t])
488
+
489
+ # for each model, create output directories with different thresholds. evaluate ALL OF THEM (might take a while lol)
490
+ thresholds = np.arange(25, 751, 25)
491
+ output_pp_tmp = join(output_base_here, 'cv_determine_pp_thresholds')
492
+ for e in experiments_with_full_cv:
493
+ input_folder = join(out, e)
494
+ for t in thresholds:
495
+ output_directory = join(output_pp_tmp, e, str(t))
496
+ maybe_mkdir_p(output_directory)
497
+ if not isfile(join(output_directory, 'results.csv')):
498
+ apply_threshold_to_folder(input_folder, output_directory, t, replace_with, processes=16)
499
+ evaluate_BraTS_folder(output_directory, gt_dir, num_processes)
500
+
501
+ # load ALL the results!
502
+ results = []
503
+ experiment_names = []
504
+ for e in experiments_with_full_cv:
505
+ for t in thresholds:
506
+ output_directory = join(output_pp_tmp, e, str(t))
507
+ expected_file = join(output_directory, 'results.csv')
508
+ if not isfile(expected_file):
509
+ print(e, 'does not have a results file for threshold', t)
510
+ continue
511
+ results.append(load_csv_for_ranking(expected_file))
512
+ experiment_names.append("%s___%d" % (e, t))
513
+ all_results = np.concatenate([i[None] for i in results], 0).transpose((2, 0, 1))
514
+
515
+ # concatenate with non postprocessed models
516
+ all_results = np.concatenate((data_for_ranking, all_results), 1)
517
+ experiment_names += experiments_with_full_cv
518
+
519
+ final_ranks, average_rank, ranks = rank_algorithms(all_results)
520
+
521
+ for t in np.argsort(final_ranks):
522
+ print(final_ranks[t], average_rank[t], experiment_names[t])
523
+
524
+ # for each model, print the non postprocessed model as well as the best postprocessed model. If there are
525
+ # validation set predictions, apply the best threshold to the validation set
526
+ pred_val_base = join(base, 'predVal_PP_rank')
527
+ has_val_pred = []
528
+ for e in experiments_with_full_cv:
529
+ rank_nonpp = final_ranks[experiment_names.index(e)]
530
+ avg_rank_nonpp = average_rank[experiment_names.index(e)]
531
+ print(e, avg_rank_nonpp, rank_nonpp)
532
+ predicted_val = join(base, 'predVal', e)
533
+
534
+ pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
535
+ if len(pp_models) > 0:
536
+ ranks = [final_ranks[i] for i in pp_models]
537
+ best_idx = np.argmin(ranks)
538
+ best = experiment_names[pp_models[best_idx]]
539
+ best_avg_rank = average_rank[pp_models[best_idx]]
540
+ print(best, best_avg_rank, min(ranks))
541
+ print('')
542
+ # apply threshold to validation set
543
+ best_threshold = int(best.split('___')[-1])
544
+ if not isdir(predicted_val):
545
+ print(e, 'has not valset predictions')
546
+ else:
547
+ files = subfiles(predicted_val, suffix='.nii.gz')
548
+ if len(files) != expected_num_cases_val:
549
+ print(e, 'has missing val cases. found: %d expected: %d' % (len(files), expected_num_cases_val))
550
+ else:
551
+ apply_threshold_to_folder(predicted_val, join(pred_val_base, e), best_threshold, replace_with, num_processes)
552
+ has_val_pred.append(e)
553
+ else:
554
+ print(e, 'not found in ranking')
555
+
556
+ # apply nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5 to nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold
557
+ e = 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5'
558
+ pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
559
+ ranks = [final_ranks[i] for i in pp_models]
560
+ best_idx = np.argmin(ranks)
561
+ best = experiment_names[pp_models[best_idx]]
562
+ best_avg_rank = average_rank[pp_models[best_idx]]
563
+ best_threshold = int(best.split('___')[-1])
564
+ predicted_val = join(base, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
565
+ apply_threshold_to_folder(predicted_val, join(pred_val_base, 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold'), best_threshold, replace_with, num_processes)
566
+ has_val_pred.append('nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
567
+
568
+ # apply nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5 to nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold
569
+ e = 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5'
570
+ pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
571
+ ranks = [final_ranks[i] for i in pp_models]
572
+ best_idx = np.argmin(ranks)
573
+ best = experiment_names[pp_models[best_idx]]
574
+ best_avg_rank = average_rank[pp_models[best_idx]]
575
+ best_threshold = int(best.split('___')[-1])
576
+ predicted_val = join(base, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
577
+ apply_threshold_to_folder(predicted_val, join(pred_val_base, 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold'), best_threshold, replace_with, num_processes)
578
+ has_val_pred.append('nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
579
+
580
+ # convert valsets
581
+ output_converted = join(base, 'converted_valSet')
582
+ for e in has_val_pred:
583
+ expected_source_folder = join(base, 'predVal_PP_rank', e)
584
+ if not isdir(expected_source_folder):
585
+ print(e, 'has no predVal_PP_rank')
586
+ raise RuntimeError()
587
+ files = subfiles(expected_source_folder, suffix='.nii.gz', join=False)
588
+ if len(files) != expected_num_cases_val:
589
+ print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases_val))
590
+ continue
591
+ target_folder = join(output_converted, 'predVal_PP_rank', e)
592
+ maybe_mkdir_p(target_folder)
593
+ convert_labels_back_to_BraTS_2018_2019_convention(expected_source_folder, target_folder)
594
+
595
+ # now load all the csvs for the validation set (obtained from evaluation platform) and rank our models on the
596
+ # validation set
597
+ flds = subdirs(output_converted, join=False)
598
+ results_valset = []
599
+ names_valset = []
600
+ for f in flds:
601
+ curr = join(output_converted, f)
602
+ experiments = subdirs(curr, join=False)
603
+ for e in experiments:
604
+ currr = join(curr, e)
605
+ expected_file = join(currr, 'Stats_Validation_final.csv')
606
+ if not isfile(expected_file):
607
+ print(f, e, "has not been evaluated yet!")
608
+ else:
609
+ res = load_csv_for_ranking(expected_file)[:-5]
610
+ assert res.shape[0] == expected_num_cases_val
611
+ results_valset.append(res[None])
612
+ names_valset.append("%s___%s" % (f, e))
613
+ results_valset = np.concatenate(results_valset, 0) # experiments x cases x metrics
614
+ # convert to metrics x experiments x cases
615
+ results_valset = results_valset.transpose((2, 0, 1))
616
+ final_ranks, average_rank, ranks = rank_algorithms(results_valset)
617
+ for t in np.argsort(final_ranks):
618
+ print(final_ranks[t], average_rank[t], names_valset[t])
619
+
620
+
621
+ if __name__ == "__main__":
622
+ """
623
+ THIS CODE IS A MESS. IT IS PROVIDED AS IS WITH NO GUARANTEES. YOU HAVE TO DIG THROUGH IT YOURSELF. GOOD LUCK ;-)
624
+
625
+ REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
626
+ """
627
+
628
+ task_name = "Task082_BraTS2020"
629
+ downloaded_data_dir = "/home/fabian/Downloads/MICCAI_BraTS2020_TrainingData"
630
+ downloaded_data_dir_val = "/home/fabian/Downloads/MICCAI_BraTS2020_ValidationData"
631
+
632
+ target_base = join(nnUNet_raw_data, task_name)
633
+ target_imagesTr = join(target_base, "imagesTr")
634
+ target_imagesVal = join(target_base, "imagesVal")
635
+ target_imagesTs = join(target_base, "imagesTs")
636
+ target_labelsTr = join(target_base, "labelsTr")
637
+
638
+ maybe_mkdir_p(target_imagesTr)
639
+ maybe_mkdir_p(target_imagesVal)
640
+ maybe_mkdir_p(target_imagesTs)
641
+ maybe_mkdir_p(target_labelsTr)
642
+
643
+ patient_names = []
644
+ cur = join(downloaded_data_dir)
645
+ for p in subdirs(cur, join=False):
646
+ patdir = join(cur, p)
647
+ patient_name = p
648
+ patient_names.append(patient_name)
649
+ t1 = join(patdir, p + "_t1.nii.gz")
650
+ t1c = join(patdir, p + "_t1ce.nii.gz")
651
+ t2 = join(patdir, p + "_t2.nii.gz")
652
+ flair = join(patdir, p + "_flair.nii.gz")
653
+ seg = join(patdir, p + "_seg.nii.gz")
654
+
655
+ assert all([
656
+ isfile(t1),
657
+ isfile(t1c),
658
+ isfile(t2),
659
+ isfile(flair),
660
+ isfile(seg)
661
+ ]), "%s" % patient_name
662
+
663
+ shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
664
+ shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
665
+ shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
666
+ shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
667
+
668
+ copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
669
+
670
+
671
+ json_dict = OrderedDict()
672
+ json_dict['name'] = "BraTS2020"
673
+ json_dict['description'] = "nothing"
674
+ json_dict['tensorImageSize'] = "4D"
675
+ json_dict['reference'] = "see BraTS2020"
676
+ json_dict['licence'] = "see BraTS2020 license"
677
+ json_dict['release'] = "0.0"
678
+ json_dict['modality'] = {
679
+ "0": "T1",
680
+ "1": "T1ce",
681
+ "2": "T2",
682
+ "3": "FLAIR"
683
+ }
684
+ json_dict['labels'] = {
685
+ "0": "background",
686
+ "1": "edema",
687
+ "2": "non-enhancing",
688
+ "3": "enhancing",
689
+ }
690
+ json_dict['numTraining'] = len(patient_names)
691
+ json_dict['numTest'] = 0
692
+ json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
693
+ patient_names]
694
+ json_dict['test'] = []
695
+
696
+ save_json(json_dict, join(target_base, "dataset.json"))
697
+
698
+ if downloaded_data_dir_val is not None:
699
+ for p in subdirs(downloaded_data_dir_val, join=False):
700
+ patdir = join(downloaded_data_dir_val, p)
701
+ patient_name = p
702
+ t1 = join(patdir, p + "_t1.nii.gz")
703
+ t1c = join(patdir, p + "_t1ce.nii.gz")
704
+ t2 = join(patdir, p + "_t2.nii.gz")
705
+ flair = join(patdir, p + "_flair.nii.gz")
706
+
707
+ assert all([
708
+ isfile(t1),
709
+ isfile(t1c),
710
+ isfile(t2),
711
+ isfile(flair),
712
+ ]), "%s" % patient_name
713
+
714
+ shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
715
+ shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
716
+ shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
717
+ shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
718
+
719
+
720
+ downloaded_data_dir_test = "/home/fabian/Downloads/MICCAI_BraTS2020_TestingData"
721
+
722
+ if isdir(downloaded_data_dir_test):
723
+ for p in subdirs(downloaded_data_dir_test, join=False):
724
+ patdir = join(downloaded_data_dir_test, p)
725
+ patient_name = p
726
+ t1 = join(patdir, p + "_t1.nii.gz")
727
+ t1c = join(patdir, p + "_t1ce.nii.gz")
728
+ t2 = join(patdir, p + "_t2.nii.gz")
729
+ flair = join(patdir, p + "_flair.nii.gz")
730
+
731
+ assert all([
732
+ isfile(t1),
733
+ isfile(t1c),
734
+ isfile(t2),
735
+ isfile(flair),
736
+ ]), "%s" % patient_name
737
+
738
+ shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
739
+ shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
740
+ shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
741
+ shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))
742
+
743
+ # test set
744
+ # nnUNet_ensemble -f nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold -o ensembled_nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold
745
+ # apply_threshold_to_folder('ensembled_nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold/', 'ensemble_PP200/', 200, 2)
746
+ # convert_labels_back_to_BraTS_2018_2019_convention('ensemble_PP200/', 'ensemble_PP200_converted')
747
+
748
+ # export for publication of weights
749
+ # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA4_BN -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 -t 82 -o nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold.zip --disable_strict
750
+ # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA3_BN_BD -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 -t 82 -o nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold.zip --disable_strict
751
+ # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA4_BN_BD -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 -t 82 -o nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold.zip --disable_strict