Spaces:

ho11laqe
/

nnUNet_calvingfront_detection

Sleeping

App Files Files Community

ho11laqe commited on Apr 19, 2023

Commit

ecf08bc

1 Parent(s): 75ea7e6

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

HIP_Logo.png +0 -0
LICENSE +201 -0
RUN_CALVINGFRONT_DETECTION.sh +26 -0
create_plots_new/Nofront.py +77 -0
create_plots_new/SegmentationMetrics.py +65 -0
create_plots_new/area_change.py +92 -0
create_plots_new/canny_edge.py +157 -0
create_plots_new/compute_significance.py +94 -0
create_plots_new/create_train_gif.py +158 -0
create_plots_new/dataset_timeline.py +56 -0
create_plots_new/front_change.py +228 -0
data_processing/data_postprocessing.py +323 -0
documentation/common_problems_and_solutions.md +104 -0
documentation/common_questions.md +201 -0
documentation/data_format_inference.md +34 -0
documentation/dataset_conversion.md +213 -0
documentation/expected_epoch_times.md +173 -0
documentation/extending_nnunet.md +119 -0
documentation/inference_example_Prostate.md +78 -0
documentation/setting_up_paths.md +84 -0
documentation/training_example_Hippocampus.md +40 -0
documentation/tutorials/custom_preprocessing.md +60 -0
documentation/tutorials/custom_spacing.md +33 -0
documentation/tutorials/edit_plans_files.md +141 -0
documentation/using_nnUNet_as_baseline.md +4 -0
evaluate_nnUNet.py +656 -0
nnunet/__init__.py +7 -0
nnunet/configuration.py +5 -0
nnunet/dataset_conversion/Task017_BeyondCranialVaultAbdominalOrganSegmentation.py +94 -0
nnunet/dataset_conversion/Task024_Promise2012.py +81 -0
nnunet/dataset_conversion/Task027_AutomaticCardiacDetectionChallenge.py +106 -0
nnunet/dataset_conversion/Task029_LiverTumorSegmentationChallenge.py +123 -0
nnunet/dataset_conversion/Task032_BraTS_2018.py +176 -0
nnunet/dataset_conversion/Task035_ISBI_MSLesionSegmentationChallenge.py +162 -0
nnunet/dataset_conversion/Task037_038_Chaos_Challenge.py +460 -0
nnunet/dataset_conversion/Task040_KiTS.py +240 -0
nnunet/dataset_conversion/Task043_BraTS_2019.py +164 -0
nnunet/dataset_conversion/Task055_SegTHOR.py +98 -0
nnunet/dataset_conversion/Task056_VerSe2019.py +274 -0
nnunet/dataset_conversion/Task056_Verse_normalize_orientation.py +98 -0
nnunet/dataset_conversion/Task058_ISBI_EM_SEG.py +105 -0
nnunet/dataset_conversion/Task059_EPFL_EM_MITO_SEG.py +99 -0
nnunet/dataset_conversion/Task061_CREMI.py +146 -0
nnunet/dataset_conversion/Task062_NIHPancreas.py +89 -0
nnunet/dataset_conversion/Task064_KiTS_labelsFixed.py +84 -0
nnunet/dataset_conversion/Task065_KiTS_NicksLabels.py +87 -0
nnunet/dataset_conversion/Task069_CovidSeg.py +68 -0
nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py +137 -0
nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py +312 -0
nnunet/dataset_conversion/Task082_BraTS_2020.py +751 -0

HIP_Logo.png ADDED Viewed

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [2019] [Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

RUN_CALVINGFRONT_DETECTION.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/bin/bash -l
+while getopts ":m:d:" opt; do
+  case $opt in
+    m) model="$OPTARG";;
+    d) data="$OPTARG";;
+    *) echo "Unknown error occurred."
+       exit 1;;
+  esac
+done
+# Point to the folder with the SAR images
+export data_raw=$data
+# Folders for processing
+export nnUNet_raw_data_base=$data_raw'/data_nnUNet_preprocessed/NIFTI/'
+export nnUNet_preprocessed=$data_raw'/data_nnUNet_preprocessed/'
+export RESULTS_FOLDER=$data_raw'/calvingfronts/'
+# Convert & Preprocess
+python3 nnunet/dataset_conversion/Task500_Glacier_inference.py -data_percentage 100 -base $data_raw
+# Inference
+python3 nnunet/inference/predict_simple.py -i $nnUNet_raw_data_base'nnUNet_raw_data/Task500_Glacier_zonefronts/imagesTs/' -o $RESULTS_FOLDER/fold_0 -t 500 -m 2d -f 0 -p nnUNetPlansv2.1 -tr nnUNetTrainerV2 -model_folder_name $model
+# Convert model output to PNG/TIF
+python3 nnunet/dataset_conversion/Task500_Glacier_reverse.py -i $RESULTS_FOLDER'fold_0/'

create_plots_new/Nofront.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.io as pio
+import os
+pio.kaleido.scope.mathjax = None
+import json
+if __name__ == '__main__':
+    experiments =['Task501_Glacier_front',
+                  'Task502_Glacier_zone',
+                  'Task503_Glacier_mtl_early',
+                  'Task503_Glacier_mtl_late',
+                  'Task505_Glacier_mtl_boundary',
+                  'Task500_Glacier_zonefronts']
+    data_dir = '/home/ho11laqe/Desktop/nnUNet_results/Final_Eval/'
+    nofront = {}
+    nozone = {}
+    for experiment in experiments:
+        no_front_exp_front = []
+        no_front_exp_zone = []
+        #nofront[experiment] = {'Front': [], 'Zone': []}
+        for fold in range(5):
+            results_json_path = os.path.join(data_dir, experiment, 'fold_'+str(fold), 'pngs', 'eval_results.json')
+            if not os.path.exists(results_json_path):
+                results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'eval_results.json')
+            with open(results_json_path, 'r') as f:
+                result = json.load(f)
+            if 'Front_Delineation' in result.keys():
+                #no_front_exp_front.append(result['Front_Delineation']['Result_all']['Number_no_front'])
+                no_front_exp_front.append(result['Front_Delineation']['Result_all']['mean'])
+            else:
+                no_front_exp_front.append(0)
+            if 'Zone_Delineation' in result.keys():
+                no_front_exp_zone.append(result['Zone_Delineation']['Result_all']['mean'])
+            else:
+                no_front_exp_zone.append(0)
+        #nofront[experiment]['Front'] = no_front_exp_front
+        #nofront[experiment]['Zone'] = no_front_exp_zone
+        nofront[experiment] = no_front_exp_front
+        nozone[experiment] = no_front_exp_zone
+    box_width = 0.8
+    fig = px.box(None, points="all", template="plotly_white", width=1200, height=500)
+    fig.add_trace(go.Box(y=nofront['Task501_Glacier_front'], name='Front<br>STL', width=box_width,
+                            marker_color='CadetBlue',  pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nofront['Task503_Glacier_mtl_early'], name='Early Front <br>MTL', width=box_width,
+                            marker_color='YellowGreen',  pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nofront['Task503_Glacier_mtl_late'], name='Late Front <br>MTL', width=box_width,
+                            marker_color='#e1e400 ',  pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nofront['Task505_Glacier_mtl_boundary'], name='Boundary<br> Front MTL', width=box_width,
+                            marker_color='gold', pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nofront['Task500_Glacier_zonefronts'], name='Fused Labels <br> Front', width=box_width,
+               marker_color='orange', pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nozone['Task502_Glacier_zone'], name='Zone<br> STL', width=box_width,
+                            marker_color='LightBlue ',  pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nozone['Task503_Glacier_mtl_early'], name='Early Zone <br>MTL', width=box_width,
+                            marker_color='YellowGreen', pointpos=0, boxpoints='all', boxmean=True,))
+    fig.add_trace(go.Box(y=nozone['Task503_Glacier_mtl_late'], name='Late Zone<br> MTL', width=box_width,
+                            marker_color='#e1e400',  pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nozone['Task505_Glacier_mtl_boundary'], name='Boundary <br>Zone MTL', width=box_width,
+                            marker_color='gold',  pointpos=0, boxpoints='all', boxmean=True))
+    fig.add_trace(go.Box(y=nozone['Task500_Glacier_zonefronts'], name='Fused Labels <br> Zone', width=box_width,
+               marker_color='orange', pointpos=0, boxpoints='all', boxmean=True))
+    fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=18))
+    fig.update_yaxes(title='Front delineation error (m)')
+    # fig.show()
+    fig.write_image("output/results.pdf", format='pdf')

create_plots_new/SegmentationMetrics.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.io as pio
+import os
+pio.kaleido.scope.mathjax = None
+import json
+if __name__ == '__main__':
+    experiments =['Task501_Glacier_front',
+                  'Task502_Glacier_zone',
+                  'Task503_Glacier_mtl_early',
+                  'Task503_Glacier_mtl_late',
+                  'Task505_Glacier_mtl_boundary',
+                  'Task500_Glacier_zonefronts_nodeep',
+                  'Task500_Glacier_zonefronts'
+                  ]
+    data_dir = '/home/ho11laqe/Desktop/nnUNet_results/Final_Eval/'
+    for metric in ['Precision', 'Recall', 'F1', 'IoU']:
+        zone_metric = {}
+        for experiment in experiments:
+            zone_metric_exp = []
+            #nofront[experiment] = {'Front': [], 'Zone': []}
+            for fold in range(5):
+                # load json file with results
+                results_json_path = os.path.join(data_dir, experiment, 'fold_'+str(fold), 'pngs', 'eval_results.json')
+                if not os.path.exists(results_json_path):
+                    results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'eval_results.json')
+                with open(results_json_path, 'r') as f:
+                    result = json.load(f)
+                if 'Zone_Segmentation' in result.keys():
+                    avg_metric = 'Average_'+metric
+                    if metric == 'F1':
+                        avg_metric = 'Average_' + metric + ' Score'
+                    zone_metric_exp.append(result['Zone_Segmentation']['Zone_'+metric][avg_metric])
+                else:
+                    zone_metric_exp.append(0)
+            zone_metric[experiment] = zone_metric_exp
+        box_width = 0.8
+        fig = px.box(None, points="all", template="plotly_white", width=700, height=500)
+        fig.add_trace(go.Box(y=zone_metric['Task502_Glacier_zone'], name='Zone<br> STL', width=box_width,
+                                line_color='LightBlue ',  pointpos=0, boxpoints='all', boxmean=True))
+        fig.add_trace(go.Box(y=zone_metric['Task503_Glacier_mtl_early'], name='Early Zone <br>MTL', width=box_width,
+                                line_color='YellowGreen', pointpos=0, boxpoints='all', boxmean=True,))
+        fig.add_trace(go.Box(y=zone_metric['Task503_Glacier_mtl_late'], name='Late Zone<br> MTL', width=box_width,
+                                line_color='#e1e400',  pointpos=0, boxpoints='all', boxmean=True))
+        fig.add_trace(go.Box(y=zone_metric['Task505_Glacier_mtl_boundary'], name='Boundary<br>Zone MTL', width=box_width,
+                                line_color='gold',  pointpos=0, boxpoints='all', boxmean=True))
+        fig.add_trace(go.Box(y=zone_metric['Task500_Glacier_zonefronts'], name='Fused Labels<br>Front', width=box_width,
+                                line_color='orange', pointpos=0, boxpoints='all', boxmean=True))
+        fig.update_layout(showlegend=False, font=dict(family="Times New Roman", size=18))
+        fig.update_yaxes(title=metric)
+        # fig.show()
+        fig.write_image('output/'+metric+".pdf", format='pdf')

create_plots_new/area_change.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import cv2
+import os
+import plotly.express as px
+import numpy as np
+import pandas as pd
+from plotly.subplots import make_subplots
+import plotly.io as pio
+pio.kaleido.scope.mathjax = None
+def distribute_glacier(list_of_samples):
+    list_of_glaciers = {}
+    for glacier in [ 'COL', 'Mapple', 'Crane', 'Jorum','DBE','SI', 'JAC']:
+        list_of_glaciers[glacier] = [sample for sample in list_of_samples if glacier in sample]
+    return list_of_glaciers
+if __name__ == '__main__':
+    generate_data = True
+    if generate_data:
+        # directories with zone label
+        train_dir = '/home/ho11laqe/PycharmProjects/data_raw/zones/train'
+        test_dir = '/home/ho11laqe/PycharmProjects/data_raw/zones/test'
+        list_of_train_samples = []
+        for sample in os.listdir(train_dir):
+            list_of_train_samples.append(os.path.join(train_dir, sample))
+        list_of_test_samples = []
+        for sample in os.listdir(test_dir):
+            list_of_test_samples.append(os.path.join(test_dir, sample))
+        list_of_samples = list_of_train_samples + list_of_test_samples
+        list_of_glacier = distribute_glacier(list_of_samples)
+        fig = make_subplots(rows=len(list_of_glacier.keys()), cols=1)
+        nan = []
+        rock = []
+        ice = []
+        ocean = []
+        date = []
+        glacier_name = []
+        for i, glacier in enumerate(list_of_glacier.keys()):
+            for sample in list_of_glacier[glacier]:
+                print(sample)
+                seg_mask = cv2.imread(sample, cv2.IMREAD_GRAYSCALE)
+                all_pixel = seg_mask.shape[0] * seg_mask.shape[1]
+                nan.append(np.count_nonzero(seg_mask == 0) / all_pixel * 100)
+                rock.append(np.count_nonzero(seg_mask == 64) / all_pixel * 100)
+                ice.append(np.count_nonzero(seg_mask == 127) / all_pixel * 100)
+                ocean.append(np.count_nonzero(seg_mask == 254) / all_pixel * 100)
+                sample_split = sample.split('_')
+                date.append(sample_split[-6])
+                glacier_name.append(glacier)
+        df = pd.DataFrame(dict(Shadow=nan, Rock=rock, Glacier=ice, Ocean=ocean, date=date, glacier_name=glacier_name))
+        df.to_csv('output/area.csv')
+    else:
+        df = pd.read_csv('output/area.csv')
+    df = df.drop_duplicates(subset=['date', 'glacier_name'])
+    area_plot = px.area(df,
+                        x="date",
+                        y=["Rock", "Shadow", "Glacier", "Ocean"],
+                        color_discrete_map={"Shadow": 'black', "Ocean": 'blue', "Glacier": "aliceblue", "Rock": "gray"},
+                        template="plotly_white",
+                        height=700,
+                        width =600,
+                        facet_row='glacier_name',
+                        category_orders={'glacier': [ 'COL', 'Mapple', 'Crane', 'Jorum','DBE','SI', 'JAC']}
+                        )
+    area_plot.update_yaxes(type='linear', range=[0, 100], ticksuffix='%', title='area', side='right')
+    area_plot.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1], textangle=0, x=0, xanchor='right'))
+    area_plot.update_layout(legend=dict(title='Area:',
+                                        orientation="h",
+                                        yanchor="bottom",
+                                        y=1.02,
+                                        xanchor="right",
+                                        x=1,
+                                        font=dict(size=12)),
+                            margin=dict(l=70, r=0, t=0, b=0)
+                            )
+    area_plot.for_each_yaxis(lambda a: a.update(title=''))
+    area_plot.update_xaxes(title=' ',tickfont=dict(size=12))
+    area_plot.update_layout(font=dict(family="Times New Roma", size=10, ))
+    area_plot.update_annotations(font=dict(size=12))
+    area_plot.write_image("output/area.pdf", format='pdf')
+    # fig.show()

create_plots_new/canny_edge.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from scipy import ndimage
+from scipy.ndimage.filters import convolve
+from scipy import misc
+import numpy as np
+import cv2
+class cannyEdgeDetector:
+    def __init__(self, imgs, sigma=5, kernel_size=10, weak_pixel=75, strong_pixel=255, lowthreshold=0.05,
+                 highthreshold=0.15):
+        self.imgs = imgs
+        self.imgs_final = []
+        self.img_smoothed = None
+        self.gradientMat = None
+        self.thetaMat = None
+        self.nonMaxImg = None
+        self.thresholdImg = None
+        self.weak_pixel = weak_pixel
+        self.strong_pixel = strong_pixel
+        self.sigma = sigma
+        self.kernel_size = kernel_size
+        self.lowThreshold = lowthreshold
+        self.highThreshold = highthreshold
+        return
+    def gaussian_kernel(self, size, sigma=1):
+        size = int(size) // 2
+        x, y = np.mgrid[-size:size + 1, -size:size + 1]
+        normal = 1 / (2.0 * np.pi * sigma ** 2)
+        g = np.exp(-((x ** 2 + y ** 2) / (2.0 * sigma ** 2))) * normal
+        return g
+    def sobel_filters(self, img):
+        Kx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], np.float32)
+        Ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], np.float32)
+        Ix = ndimage.filters.convolve(img, Kx)
+        Iy = ndimage.filters.convolve(img, Ky)
+        G = np.hypot(Ix, Iy)
+        G = G / G.max() * 255
+        theta = np.arctan2(Iy, Ix)
+        return (G, theta, Ix, Iy)
+    def non_max_suppression(self, img, D):
+        M, N = img.shape
+        Z = np.zeros((M, N), dtype=np.int32)
+        angle = D * 180. / np.pi
+        angle[angle < 0] += 180
+        for i in range(1, M - 1):
+            for j in range(1, N - 1):
+                try:
+                    q = 255
+                    r = 255
+                    # angle 0
+                    if (0 <= angle[i, j] < 22.5) or (157.5 <= angle[i, j] <= 180):
+                        q = img[i, j + 1]
+                        r = img[i, j - 1]
+                    # angle 45
+                    elif (22.5 <= angle[i, j] < 67.5):
+                        q = img[i + 1, j - 1]
+                        r = img[i - 1, j + 1]
+                    # angle 90
+                    elif (67.5 <= angle[i, j] < 112.5):
+                        q = img[i + 1, j]
+                        r = img[i - 1, j]
+                    # angle 135
+                    elif (112.5 <= angle[i, j] < 157.5):
+                        q = img[i - 1, j - 1]
+                        r = img[i + 1, j + 1]
+                    if (img[i, j] >= q) and (img[i, j] >= r):
+                        Z[i, j] = img[i, j]
+                    else:
+                        Z[i, j] = 0
+                except IndexError as e:
+                    pass
+        return Z
+    def threshold(self, img):
+        highThreshold = img.max() * self.highThreshold;
+        lowThreshold = highThreshold * self.lowThreshold;
+        M, N = img.shape
+        res = np.zeros((M, N), dtype=np.int32)
+        weak = np.int32(self.weak_pixel)
+        strong = np.int32(self.strong_pixel)
+        strong_i, strong_j = np.where(img >= highThreshold)
+        zeros_i, zeros_j = np.where(img < lowThreshold)
+        weak_i, weak_j = np.where((img <= highThreshold) & (img >= lowThreshold))
+        res[strong_i, strong_j] = strong
+        res[weak_i, weak_j] = weak
+        return (res)
+    def hysteresis(self, img):
+        M, N = img.shape
+        weak = self.weak_pixel
+        strong = self.strong_pixel
+        for i in range(1, M - 1):
+            for j in range(1, N - 1):
+                if (img[i, j] == weak):
+                    try:
+                        if ((img[i + 1, j - 1] == strong) or (img[i + 1, j] == strong) or (img[i + 1, j + 1] == strong)
+                                or (img[i, j - 1] == strong) or (img[i, j + 1] == strong)
+                                or (img[i - 1, j - 1] == strong) or (img[i - 1, j] == strong) or (
+                                        img[i - 1, j + 1] == strong)):
+                            img[i, j] = strong
+                        else:
+                            img[i, j] = 0
+                    except IndexError as e:
+                        pass
+        return img
+    def detect(self):
+        imgs_final = []
+        for i, img in enumerate(self.imgs):
+            cv2.imwrite('output/0img.png', img)
+            self.img_smoothed = convolve(img, self.gaussian_kernel(self.kernel_size, self.sigma))
+            self.img_smoothed = self.img_smoothed/np.max(self.img_smoothed)*255
+            cv2.imwrite('output/1smoothed.png', self.img_smoothed)
+            self.gradientMat, self.thetaMat, Ix, Iy = self.sobel_filters(self.img_smoothed)
+            cv2.imwrite('output/2Ix.png', Ix)
+            cv2.imwrite('output/2Iy.png', Iy)
+            cv2.imwrite('output/4deltaI.png', self.gradientMat.astype(float))
+            cv2.imwrite('output/5theta.png', self.thetaMat.astype(float) / np.max(self.thetaMat) * 255)
+            self.nonMaxImg = self.non_max_suppression(self.gradientMat, self.thetaMat)
+            cv2.imwrite('output/6nonmax.png', self.nonMaxImg)
+            self.thresholdImg = self.threshold(self.nonMaxImg)
+            cv2.imwrite('output/7threshold.png', self.thresholdImg)
+            img_final = self.hysteresis(self.thresholdImg)
+            self.imgs_final.append(img_final)
+        return self.imgs_final
+if __name__ == '__main__':
+    image_path = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/test/Mapple_2011-06-02_TSX_7_1_110.png'
+    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)[:1000,-1000:].astype(np.float32)
+    detector = cannyEdgeDetector([img], sigma=20)
+    edge = detector.detect()
+    cv2.imwrite('output/8edge.png', edge[0])

create_plots_new/compute_significance.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.io as pio
+pio.kaleido.scope.mathjax = None
+import os
+import json
+if __name__ == '__main__':
+    experiments = ['Task501_Glacier_front',
+                   'Task502_Glacier_zone',
+                   'Task503_Glacier_mtl_early',
+                   'Task503_Glacier_mtl_late',
+                   'Task505_Glacier_mtl_boundary',
+                   'Task500_Glacier_zonefronts']
+    data_dir = '/home/ho11laqe/Desktop/nnUNet_results/Final_Eval/'
+    zone_mean = {}
+    front_mean = {}
+    for experiment in experiments:
+        print(experiment)
+        zone_mean_exp = []
+        front_mean_exp = []
+        # nofront[experiment] = {'Front': [], 'Zone': []}
+        for fold in range(5):
+            # load json file with results
+            results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'pngs',
+                                             'eval_results.json')
+            if not os.path.exists(results_json_path):
+                results_json_path = os.path.join(data_dir, experiment, 'fold_' + str(fold), 'eval_results.json')
+            with open(results_json_path, 'r') as f:
+                result = json.load(f)
+            if 'Front_Delineation' in result.keys():
+                front_mean_exp.append(result['Front_Delineation']['Result_all']['mean'])
+            else:
+                front_mean_exp.append(0)
+            if 'Zone_Delineation' in result.keys():
+                zone_mean_exp.append(result['Zone_Delineation']['Result_all']['mean'])
+            else:
+                zone_mean_exp.append(0)
+        print(np.mean(zone_mean_exp), np.std(zone_mean_exp))
+        print(np.mean(front_mean_exp), np.std(front_mean_exp))
+        zone_mean[experiment] = zone_mean_exp
+        front_mean[experiment] = front_mean_exp
+    for exp1 in experiments:
+        for exp2 in experiments:
+            # FRONT
+            mean1 = np.mean(front_mean[exp1])
+            var1 = np.var (front_mean[exp1])
+            mean2 = np.mean(front_mean[exp2])
+            var2 = np.var(front_mean[exp2])
+            T_front = abs(mean1 - mean2) / np.sqrt((var1 / 5) + (var2 / 5))
+            print(exp1 + '<>' +exp2)
+            print('Tfront:'+ str(T_front))
+            # Zone
+            mean1 = np.mean(zone_mean[exp1])
+            var1 = np.var(zone_mean[exp1])
+            mean2 = np.mean(zone_mean[exp2])
+            var2 = np.var(zone_mean[exp2])
+            T_zone = abs(mean1 - mean2) / np.sqrt((var1 / 5) + (var2 / 5))
+            print('Tzone:' + str(T_zone))
+            print('')
+        """
+        box_width = 0.8
+        fig = px.box(None, points="all", template="plotly_white", width=600, height=500)
+        fig.add_trace(go.Box(y=zone_mean['Task502_Glacier_zone'], name='Zone<br> STL', width=box_width,
+                             line_color='black', fillcolor='LightBlue ', pointpos=0, boxpoints='all', boxmean=True))
+        fig.add_trace(go.Box(y=zone_mean['Task503_Glacier_mtl_early'], name='Early Zone <br>MTL', width=box_width,
+                             line_color='black', fillcolor='YellowGreen', pointpos=0, boxpoints='all',
+                             boxmean=True, ))
+        fig.add_trace(go.Box(y=zone_mean['Task503_Glacier_mtl_late'], name='Late Zone<br> MTL', width=box_width,
+                             line_color='black', fillcolor='#e1e400', pointpos=0, boxpoints='all', boxmean=True))
+        fig.add_trace(
+            go.Box(y=zone_mean['Task505_Glacier_mtl_boundary'], name='Boundary <br>Zone MTL', width=box_width,
+                   line_color='black', fillcolor='gold', pointpos=0, boxpoints='all', boxmean=True))
+        fig.update_layout(showlegend=False, font=dict(family="Times New Roman", size=18))
+        fig.update_yaxes(title='Front mean')
+        # fig.show()
+        fig.write_image('Front mean' + ".pdf", format='pdf')
+        """

create_plots_new/create_train_gif.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import imageio
+from skimage import io
+import skimage
+import os
+from PIL import Image, ImageDraw, ImageFont, ImageOps
+import copy
+from datetime import date
+import numpy as np
+from argparse import ArgumentParser
+from skimage.transform import resize
+# import matplotlib.pyplot as plt
+import cv2
+def color_map(m):
+    return m[0] * np.array([1, 1, 1]) + (255 - m[0]) * np.array([0, 0, 1])
+def createOverlay(image, front, zone, boundary):
+    """
+    creates an image with the front label overlaying the glacier image
+    :param image: Image of the glacier
+    :param front: Image of the label of the front
+    :return: an rgb image with the black and white image and red front line
+    """
+    # value for NA area=0, stone=64, glacier=127, ocean with ice melange=254
+    image_rgb = np.array(image * 0.5, dtype=np.uint8)
+    try:
+        image_rgb[zone == 0] += np.array(np.array([0, 0, 0]) / 2, dtype=np.uint8)
+        image_rgb[zone == 64] += np.array(np.array([52, 46, 55]) / 2, dtype=np.uint8)
+        image_rgb[zone == 127] += np.array(np.array([254, 254, 254]) / 2, dtype=np.uint8)
+        image_rgb[zone == 254] += np.array(np.array([60, 145, 230]) / 2, dtype=np.uint8)
+    finally:
+        #try:
+        #    image_rgb[boundary > 0] = np.array(np.array([241, 143, 1]), dtype=np.uint8)
+        #finally:
+        image_rgb[front == 255] = np.array(np.array([255, 0, 0]), dtype=np.uint8)
+    return image_rgb
+def create_target(sar_image_path):
+    sample_name = sar_image_path.split('/')[-1]
+    sar_image = cv2.imread(sar_image_path)
+    front_image_path = '/home/ho11laqe/PycharmProjects/data_raw/fronts_dilated_5/train/' + sample_name[
+                                                                                           :-len('.png')] + '_front.png'
+    zone_image_path = '/home/ho11laqe/PycharmProjects/data_raw/zones/train/' + sample_name[
+                                                                               :-len('.png')] + '_zones.png'
+    boundary_image_path = '/home/ho11laqe/PycharmProjects/data_raw/boundaries_dilated_5/train/' + sample_name[
+                                                                                                  :-len(
+                                                                                                      '.png')] + '_boundary.png'
+    front = cv2.imread(front_image_path, cv2.IMREAD_GRAYSCALE)
+    zone = cv2.imread(zone_image_path, cv2.IMREAD_GRAYSCALE)
+    boundary = cv2.imread(boundary_image_path, cv2.IMREAD_GRAYSCALE)
+    overlay = createOverlay(sar_image, front, zone, boundary)
+    cv2.imwrite('output/target.png', cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
+if __name__ == '__main__':
+    parser = ArgumentParser(add_help=False)
+    parser.add_argument('--image_dir', help="Directory with predictions as png")
+    args = parser.parse_args()
+    image_dir = args.image_dir
+    front_gif = []
+    fronts = []
+    zone_gif = []
+    zones = []
+    boundary_gif = []
+    boundaries = []
+    sar_image_path = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/train/DBE_2008-03-30_TSX_7_3_049.png'
+    sar_image = cv2.imread(sar_image_path)
+    shape = sar_image.shape
+    new_shape = (int(shape[1] / 4), int(shape[0] / 4))
+    sar_image = cv2.resize(sar_image, new_shape)
+    create_target(sar_image_path)
+    list_images = os.listdir(image_dir)
+    list_images.sort(key=lambda y: int(y.split('_')[6]))
+    for i, image_file in enumerate(list_images[:300]):
+        epoch = image_file.split('_')[6]
+        if image_file.endswith('_front.png'):
+            print(image_file)
+            front = cv2.imread(image_dir + '/' + image_file, cv2.IMREAD_GRAYSCALE)
+            front = cv2.resize(front, new_shape, interpolation=cv2.INTER_NEAREST)
+            # image = Image.fromarray(front)
+            # image_draw = ImageDraw.Draw(image)
+            # image_draw.text((1,1), 'Epoch: '+str(epoch))
+            # front_gif.append(image)
+            fronts.append(front)
+        elif image_file.endswith('_zone.png'):
+            print(image_file)
+            zone = cv2.imread(image_dir + '/' + image_file, cv2.IMREAD_GRAYSCALE)
+            zone = cv2.resize(zone, new_shape, interpolation=cv2.INTER_NEAREST)
+            # image = Image.fromarray(zone)
+            # image_draw = ImageDraw.Draw(image)
+            # image_draw.text((1, 1), 'Epoch: ' + str(epoch))
+            # zone_gif.append(image)
+            zones.append(zone)
+        elif image_file.endswith('_boundary.png'):
+            print(image_file)
+            boundary = cv2.imread(image_dir + '/' + image_file, cv2.IMREAD_GRAYSCALE)
+            boundary = cv2.resize(boundary, new_shape, interpolation=cv2.INTER_NEAREST)
+            # image = Image.fromarray(boundary)
+            # image_draw = ImageDraw.Draw(image)
+            # image_draw.text((1, 1), 'Epoch: ' + str(epoch))
+            # boundary_gif.append(image)
+            boundaries.append(boundary)
+    font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf", 40)
+    font_legend = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf", 20)
+    overlay_gif = []
+    for epoch, (front, zone, boundary) in enumerate(zip(fronts, zones, boundaries)):
+        overlay = createOverlay(sar_image, front, zone, boundary)
+        image = Image.fromarray(overlay)
+        image_draw = ImageDraw.Draw(image)
+        image_draw.rectangle((0, 40, 195, 210), fill='gray')
+        image_draw.rectangle((10, 60, 30, 80), fill=(60, 145, 230, 120))
+        image_draw.text((35, 60), 'Ocean', font=font_legend)
+        image_draw.rectangle((10, 90, 30, 110), fill=(255, 255, 255))
+        image_draw.text((35, 90), 'Glacier', font=font_legend)
+        image_draw.rectangle((10, 120, 30, 140), fill=(255, 0, 0))
+        image_draw.text((35, 120), 'Glacier Front', font=font_legend)
+        image_draw.rectangle((10, 150, 30, 170), fill=(92, 76, 85))
+        image_draw.text((35, 150), 'Rock', font=font_legend)
+        image_draw.rectangle((10, 180, 30, 200), fill=(0, 0, 0))
+        image_draw.text((35, 180), 'Shadow', font=font_legend)
+        image_draw.rectangle((0, 0, 330, 45), fill='gray')
+        image_draw.text((8, 1), 'Epoch:%03i' % epoch + '/' + str(len(fronts)), font=font, )
+        if epoch < 10:
+            for i in range(10 - epoch):
+                print(i)
+                overlay_gif.append(image)
+        else:
+            overlay_gif.append(image)
+    frame_one = overlay_gif[0]
+    frame_one.save("output/overlay.gif", format="GIF", append_images=overlay_gif,
+                   save_all=True, duration=200, loop=0)

create_plots_new/dataset_timeline.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import numpy as cv
+import os
+import plotly.express as px
+import plotly.figure_factory as ff
+import datetime
+import plotly.io as pio
+pio.kaleido.scope.mathjax = None
+def distribute_glacier(list_of_samples):
+    list_of_glaciers = {}
+    for glacier in ['COL', 'Mapple', 'Crane', 'Jorum', 'DBE', 'SI', 'JAC']:
+        list_of_glaciers[glacier] = [sample for sample in list_of_samples if glacier in sample]
+    return list_of_glaciers
+def create_dict(list_of_samples):
+    list_dict = []
+    for sample in list_of_samples:
+        sample_split = sample.split('_')
+        finish_date = datetime.datetime.fromisoformat(sample_split[1]) + datetime.timedelta(days=50)
+        sample_dict = {
+            'Glacier': sample_split[0],
+            'Start': sample_split[1],
+            'Finish': str(finish_date),
+            'Satellite:': sample_split[2]
+        }
+        list_dict.append(sample_dict)
+    return list_dict
+if __name__ == '__main__':
+    list_of_train_samples = os.listdir('/home/ho11laqe/PycharmProjects/data_raw/fronts/train')
+    list_of_test_samples = os.listdir('/home/ho11laqe/PycharmProjects/data_raw/fronts/test')
+    list_of_samples = list_of_train_samples + list_of_test_samples
+    list_of_glaciers = distribute_glacier(list_of_samples)
+    list_dict = create_dict(list_of_samples)
+    fig = px.timeline(list_dict, x_start='Start', x_end='Finish', color="Satellite:", y='Glacier',
+                      color_discrete_sequence=px.colors.qualitative.G10, template="plotly_white",
+                      height=300, category_orders={'Glacier': ['COL', 'Mapple', 'Crane', 'Jorum', 'DBE', 'SI', 'JAC'],
+                                                   'Satellite:': ['ERS', 'RSAT', 'ENVISAT', 'PALSAR', 'TSX', 'TDX',
+                                                                  'S1']})
+    fig.update_layout(legend=dict(
+        orientation="h",
+        yanchor="bottom",
+        y=1.02,
+        xanchor="right",
+        x=1,
+    ),
+        margin=dict(l=0, r=0, t=0, b=0), )
+    fig.update_layout(
+        font=dict(family="Computer Modern", size=14))
+    fig.write_image("output/dataset_timeline.pdf", format='pdf')
+    # fig.show()

create_plots_new/front_change.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import cv2
+import numpy as np
+import os
+import plotly.express as px
+import plotly.figure_factory as ff
+import datetime
+import plotly.io as pio
+import plotly.graph_objs as go
+pio.kaleido.scope.mathjax = None
+import math
+# import pylab
+from matplotlib.colors import LinearSegmentedColormap
+from PIL import ImageColor
+def distribute_glacier(list_of_samples):
+    list_of_glaciers = {}
+    for glacier in ['JAC']:
+    #for glacier in [ 'COL', 'Mapple', 'Crane', 'Jorum','DBE','SI', 'JAC']:
+        list_of_glaciers[glacier] = [sample for sample in list_of_samples if glacier in sample]
+    return list_of_glaciers
+def create_dict(list_of_samples):
+    list_dict = []
+    for sample in list_of_samples:
+        sample_split = sample.split('_')
+        finish_date = datetime.datetime.fromisoformat(sample_split[1]) + datetime.timedelta(days=50)
+        sample_dict = {
+            'Glacier': sample_split[0],
+            'Start': sample_split[1],
+            'Finish': str(finish_date),
+            'Satellite:': sample_split[2]
+        }
+        list_dict.append(sample_dict)
+    return list_dict
+if __name__ == '__main__':
+    train_dir = '/home/ho11laqe/PycharmProjects/data_raw/fronts/train/'
+    test_dir = '/home/ho11laqe/PycharmProjects/data_raw/fronts/test/'
+    list_of_train_samples = os.listdir(train_dir)
+    list_of_test_samples = os.listdir(test_dir)
+    list_of_samples = list_of_train_samples + list_of_test_samples
+    list_of_glaciers = distribute_glacier(list_of_samples)
+    list_dict = create_dict(list_of_samples)
+    # define color map
+    colormap = px.colors.sequential.Reds[-1::-1]
+    for glacier in list_of_glaciers:
+        print(glacier)
+        list_of_glaciers[glacier].sort()
+        if glacier in ['COL', 'Mapple']:
+            data_directory = test_dir
+            image_directory = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/test/'
+        else:
+            data_directory = train_dir
+            image_directory = '/home/ho11laqe/PycharmProjects/data_raw/sar_images/train/'
+        # define SAR blackground image
+        if glacier == 'COL':
+            canvas = cv2.imread(image_directory + 'COL_2011-11-13_TDX_7_1_092.png')
+            shape = canvas.shape
+        elif glacier == 'JAC':
+            canvas = cv2.imread(image_directory + 'JAC_2009-06-21_TSX_6_1_005.png')
+            shape = canvas.shape
+        elif glacier == 'Jorum':
+            canvas = cv2.imread(image_directory + 'Jorum_2011-09-04_TSX_7_4_034.png')
+            shape = canvas.shape
+        elif glacier == 'Mapple':
+            canvas = cv2.imread(image_directory + 'Mapple_2008-10-13_TSX_7_2_034.png')
+            shape = canvas.shape
+        elif glacier == 'SI':
+            canvas = cv2.imread(image_directory + 'SI_2013-08-14_TSX_7_1_125.png')
+        elif glacier == 'Crane':
+            canvas = cv2.imread(image_directory + 'Crane_2008-10-13_TSX_7_3_034.png')
+        elif glacier == 'DBE':
+            canvas = cv2.imread(image_directory + 'DBE_2008-03-30_TSX_7_3_049.png')
+        else:
+            print('No image for background')
+            exit()
+        number_images = len(list_of_glaciers[glacier])
+        kernel = np.ones((3, 3), np.uint8)
+        # iterate over all fronts of one glacier
+        for i, image_name in enumerate(list_of_glaciers[glacier]):
+            front = cv2.imread(data_directory + image_name)
+            # if front label has to be resized to fit background image
+            # the front is not dilated.
+            if front.shape != canvas.shape:
+                front = cv2.resize(front, (shape[1], shape[0]))
+            else:
+                front = cv2.dilate(front, kernel)
+            # color interpolation based on position in dataset
+            # TODO based on actual date
+            index = (1 - i / number_images) * (len(colormap) - 1)
+            up = math.ceil(index)
+            down = up - 1
+            color_up = np.array(ImageColor.getcolor(colormap[up], 'RGB'))
+            color_down = np.array(ImageColor.getcolor(colormap[down], 'RGB'))
+            dif = up - down
+            color = color_up * (1 - dif) + color_down * dif
+            # draw front on canvas
+            non_zeros = np.nonzero(front)
+            canvas[non_zeros[:2]] = np.uint([color for _ in non_zeros[0]])
+        #scale reference for fontsize
+        ref_x = 15000 / 7
+        if glacier == 'COL':
+            image = canvas[750:, 200:2800]
+            new_shape = image.shape
+            res = 7
+            scale = new_shape[1] / ref_x
+            fig = px.imshow(image, height=new_shape[0]- int(80 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.4, 4.4],
+                          ticktext=['2011<br>(+0.8°C)', '2020<br>(+1.2°C)'],
+                          outlinewidth=0)
+        elif glacier == 'Mapple':
+            image = canvas
+            new_shape = image.shape
+            res = 7
+            scale = new_shape[1] / ref_x
+            fig = px.imshow(image, height=new_shape[0] - int(150 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['2006', '2020 '],
+                          outlinewidth=0)
+        elif glacier == 'Crane':
+            image = canvas[:2500,:]
+            new_shape = image.shape
+            res = 7
+            scale = new_shape[1] / ref_x
+            fig = px.imshow(image, height=new_shape[0] - int(150 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['2002', '2014'],
+                          outlinewidth=0)
+        elif glacier == 'Jorum':
+            image = canvas#[200:1600, 1500:]
+            new_shape = image.shape
+            res = 7
+            scale = new_shape[1] / ref_x
+            fig = px.imshow(image, height=new_shape[0] - int(240 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['2003', '2020'],
+                          outlinewidth=0)
+        elif glacier == 'DBE':
+            image = canvas[700:, 750:]
+            new_shape = image.shape
+            res = 7
+            scale = new_shape[1] / ref_x
+            fig = px.imshow(image, height=new_shape[0] - int(150 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.7, 4.7], ticktext=['1995', '2014'],
+                          outlinewidth=0)
+        elif glacier == 'SI':
+            image = canvas
+            new_shape = image.shape
+            res = 7
+            scale = new_shape[0] / ref_x
+            fig = px.imshow(image, height=new_shape[0] - int(240 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.8, 4.8], ticktext=['1995', '2014'],
+                          outlinewidth=0)
+        elif glacier == 'JAC':
+            image = canvas[:, :]
+            new_shape = image.shape
+            res = 6
+            scale = new_shape[1] / ref_x
+            fig = px.imshow(image, height=new_shape[0] - int(340 * scale), width=new_shape[1])
+            legend = dict(thickness=int(50 * scale), tickvals=[-4.6, 4.7],
+                          ticktext=['2009<br>(+0.7°C)', '2015<br>(+0.9°C)'],
+                          outlinewidth=0)
+        else:
+            fig = px.imshow(canvas)
+            res = 7
+            scale = 1
+        colorbar_trace = go.Scatter(x=[None],
+                                    y=[None],
+                                    mode='markers',
+                                    marker=dict(
+                                        colorscale=colormap[::-1],
+                                        showscale=True,
+                                        cmin=-5,
+                                        cmax=5,
+                                        colorbar=legend
+                                    ),
+                                    hoverinfo='none'
+                                    )
+        fig.update_layout(yaxis=dict(tickmode='array',
+                                     tickvals=[0, 5000 / res, 10000 / res, 15000 / res, 20000 / res, 25000 / res],
+                                     ticktext=[0, 5, 10, 15, 20, 25],
+                                     title='km'))
+        fig.update_layout(xaxis=dict(tickmode='array',
+                                     tickvals=[0, 5000 / res, 10000 / res, 15000 / res, 20000 / res, 25000 / res],
+                                     ticktext=[0, 5, 10, 15, 20, 25],
+                                     title='km'))
+        fig.update_xaxes(tickfont=dict(size=int(40 * scale)))
+        fig.update_yaxes(tickfont=dict(size=int(40 * scale)))
+        fig.update_layout(font=dict(size=int(60 * scale), family="Computer Modern"))
+        fig.update_coloraxes(colorbar_x=0)
+        fig['layout']['xaxis']['title']['font']['size'] = int(60 * scale)
+        fig['layout']['yaxis']['title']['font']['size'] = int(60 * scale)
+        fig['layout']['showlegend'] = False
+        fig.add_trace(colorbar_trace)
+        fig.write_image('output/' + glacier + "_front_change.pdf", format='pdf')
+        # fig.show()

data_processing/data_postprocessing.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import numpy as np
+import os
+import re
+import pickle
+import cv2
+from einops import rearrange
+from scipy.ndimage.filters import gaussian_filter
+import skimage.measure
+import skimage.color
+from skimage.morphology import skeletonize
+from fil_finder import FilFinder2D
+import astropy.units as u
+# ################################################################################################################
+# POSTPROCESSING PUTS THE PATCHES TOGETHER, SUBSTRACTS THE PADDING
+# AND CHOOSES THE CLASS WITH HIGHEST PROBABILITY AS PREDICTION.
+# SECONDLY, THE FRONT LINE IS EXTRACTED FROM THE PREDICTION
+# ################################################################################################################
+def is_subarray(subarray, arr):
+    """
+    Test whether subarray is a subset of arr
+    :param subarray: list of numbers
+    :param arr: list of numbers
+    :return: boolean
+    """
+    count = 0
+    for element in subarray:
+        if element in arr:
+            count += 1
+    if count == len(subarray):
+        return True
+    return False
+def reconstruct_from_patches_and_binarize(src_directory, dst_directory, modality, threshold_front_prob):
+    """
+    Reconstruct the image from patches in src_directory and store them in dst_directory.
+    The src_directory contains masks (patches = number_of_classes x height x width).
+    The class with maximum probability will be chosen as prediction after averaging the probabilities across patches
+    (if there is an overlap) and the image in dst_directory will only show the prediction (image = height x width)
+    :param src_directory:  source directory which contains pickled patches (class x height x width)
+    :param dst_directory: destination directory
+    :param modality: Either "fronts" or "zones"
+    :return: prediction (image = height x width)
+    """
+    assert modality == "fronts" or modality == "zones", "Modality must either be 'fronts' or 'zones'."
+    patches = os.listdir(src_directory)
+    list_of_names = []
+    for patch_name in patches:
+        list_of_names.append(os.path.split(patch_name)[1].split("__")[0])
+    image_names = set(list_of_names)
+    for name in image_names:
+        print(f"File: {name}")
+        # #####################################################################################################
+        # Search all patches that belong to the image with the name "name"
+        # #####################################################################################################
+        pattern = re.compile(name)
+        patches_for_image_names = [a for a in patches if pattern.match(a)]
+        assert len(patches_for_image_names) > 0, "No patches found for image " + name
+        patches_for_image = []  # Will be Number_Of_Patches x Number_Of_Classes x Height x Width
+        irow = []
+        icol = []
+        padded_bottom = int(patches_for_image_names[0][:-4].split("_")[-5])
+        padded_right = int(patches_for_image_names[0][:-4].split("_")[-4])
+        for file_name in patches_for_image_names:
+            # #####################################################################################################
+            # Get the origin of the patches out of their names
+            # #####################################################################################################
+            # naming convention: nameOfTheOriginalImage__PaddedBottom_PaddedRight_NumberOfPatch_irow_icol.png
+            # Mask patches are 3D arrays with class probabilities
+            with open(os.path.join(src_directory, file_name), "rb") as fp:
+                class_probabilities_array = pickle.load(fp)
+                assert class_probabilities_array.ndim == 3, "Patch " + file_name + " has not enough dimensions (3 needed). Found: " + str(class_probabilities_array.ndim)
+                if modality == "fronts":
+                    assert len(class_probabilities_array) <= 2, "Patch " + file_name + " has too many classes (<2 needed). Found: " + str(len(class_probabilities_array))
+                else:
+                    assert len(class_probabilities_array) <= 4, "Patch " + file_name + " has too many classes (<4 needed). Found: " + str(len(class_probabilities_array))
+                patches_for_image.append(class_probabilities_array)
+            irow.append(int(os.path.split(file_name)[1][:-4].split("_")[-2]))
+            icol.append(int(os.path.split(file_name)[1][:-4].split("_")[-1]))
+        # Images are masks and store the probabilities for each class (patch = number_class x height x width)
+        class_patches_for_image = []
+        patches_for_image = [np.array(x) for x in patches_for_image]
+        patches_for_image = np.array(patches_for_image)
+        for class_layer in range(len(patches_for_image[0])):
+            class_patches_for_image.append(patches_for_image[:, class_layer, :, :])
+        class_probabilities_complete_image = []
+        # #####################################################################################################
+        # Reconstruct image (with number of channels = classes) from patches
+        # #####################################################################################################
+        for class_number in range(len(class_patches_for_image)):
+            class_probability_complete_image, _ = reconstruct_from_grayscale_patches_with_origin(class_patches_for_image[class_number],
+                                                                                                 origin=(irow, icol), use_gaussian=True)
+            class_probabilities_complete_image.append(class_probability_complete_image)
+        ######################################################################################################
+        # Cut Padding
+        ######################################################################################################
+        if modality == "zones":
+            class_probabilities_complete_image = np.array(class_probabilities_complete_image)
+            class_probabilities_complete_image = class_probabilities_complete_image[:, :-padded_bottom, :-padded_right]
+        else:
+            class_probabilities_complete_image = rearrange(class_probabilities_complete_image, '1 h w -> h w')
+            class_probabilities_complete_image = np.array(class_probabilities_complete_image)
+            class_probabilities_complete_image = class_probabilities_complete_image[:-padded_bottom, :-padded_right]
+        # #####################################################################################################
+        # Get prediction from probabilities
+        # #####################################################################################################
+        if modality == "zones":
+            # Choose class with highest probability as prediction
+            prediction = np.argmax(class_probabilities_complete_image, axis=0)
+        else:
+            # Take a threshold to get the class
+            prediction = class_probabilities_complete_image
+            prediction[prediction > threshold_front_prob] = 1
+            prediction[prediction <= threshold_front_prob] = 0
+        # #####################################################################################################
+        #  Convert [0, 1] to [0, 255] range
+        # #####################################################################################################
+        if modality == "fronts":
+            prediction[prediction == 0] = 0
+            prediction[prediction == 1] = 255
+            assert (is_subarray(np.unique(prediction), [0, 255])), "Unique front values are not correct"
+        else:
+            prediction[prediction == 0] = 0
+            prediction[prediction == 1] = 64
+            prediction[prediction == 2] = 127
+            prediction[prediction == 3] = 254
+            assert (is_subarray(np.unique(prediction), [0, 64, 127, 254])), "Unique zone values are not correct"
+        cv2.imwrite(os.path.join(dst_directory, name + '.png'), prediction)
+def get_gaussian(patch_size, sigma_scale=1. / 8) -> np.ndarray:
+    """
+    Returns Gaussian map with size of patch and sig
+    :param patch_size: The size of the image patches -> gaussian importance map will have the same size
+    :param sigma_scale: A scaling factor
+    :return: Gaussian importance map
+    """
+    tmp = np.zeros(patch_size)
+    center_coords = [i // 2 for i in patch_size]
+    sigmas = [i * sigma_scale for i in patch_size]
+    tmp[tuple(center_coords)] = 1
+    gaussian_importance_map = gaussian_filter(tmp, sigmas, 0, mode='constant', cval=0)
+    gaussian_importance_map = gaussian_importance_map / np.max(gaussian_importance_map) * 1
+    gaussian_importance_map = gaussian_importance_map.astype(np.float32)
+    # gaussian_importance_map cannot be 0, otherwise we may end up with nans!
+    gaussian_importance_map[gaussian_importance_map == 0] = np.min(
+        gaussian_importance_map[gaussian_importance_map != 0])
+    return gaussian_importance_map
+def reconstruct_from_grayscale_patches_with_origin(patches, origin, use_gaussian, epsilon=1e-12):
+    """Rebuild an image from a set of patches by averaging. The reconstructed image will have different dimensions than
+    the original image if the strides and offsets of the patches were changed from the defaults!
+    Adopted from: http://jamesgregson.ca/extract-image-patches-in-python.html
+    :param patches: (ndarray) input patches as (N,patch_height,patch_width) array
+    :param origin: (2-tuple) = row index and column index coordinates of each patch
+    :param use_gaussian: Boolean to turn on Gaussian Importance Weighting
+    :param epsilon: (scalar) regularization term for averaging when patches some image pixels are not covered by any patch
+    :return image, weight
+        image (ndarray): output image reconstructed from patches of size (max(origin[0])+patches.shape[1], max(origin[1])+patches.shape[2])
+        weight (ndarray): output weight matrix consisting of the count of patches covering each pixel
+    """
+    patches = np.array(patches)
+    origin = np.array(origin)
+    patch_height = len(patches[0])
+    patch_width = len(patches[0][0])
+    img_height = np.max(origin[0]) + patch_height
+    img_width = np.max(origin[1]) + patch_width
+    out = np.zeros((img_height, img_width))
+    wgt = np.zeros((img_height, img_width))
+    if use_gaussian:
+        scale_wgt = get_gaussian((patch_height, patch_width))
+    else:
+        scale_wgt = np.ones((patch_height, patch_width))
+    for i in range(patch_height):
+        for j in range(patch_width):
+            out[origin[0]+i, origin[1]+j] += patches[:, i, j] * scale_wgt[i, j]
+            wgt[origin[0] + i, origin[1] + j] += scale_wgt[i, j]
+    return out / np.maximum(wgt, epsilon), wgt
+def postprocess_zone_segmenation(mask):
+    """
+    Post-process zone segmentation by filling gaps in ocean region and creating cluster of ocean mask and removing clusters except for the largest -> left with one big ocean.
+    :param mask: a numpy array representing the segmentation mask with 1 channel
+    :return mask: a numpy array representing the filtered mask with 1 channel
+    """
+    # #############################################################################################
+    # Fill Gaps in Ocean
+    # #############################################################################################
+    # get inverted ocean mask
+    ocean_mask = mask == 254
+    ocean_mask = np.invert(ocean_mask)
+    labeled_image, num_cluster = skimage.measure.label(ocean_mask, connectivity=2, return_num=True)
+    # extract largest cluster
+    cluster_size = np.zeros(num_cluster + 1)
+    for cluster_label in range(1, num_cluster + 1):
+        cluster = labeled_image == cluster_label
+        cluster_size[cluster_label] = cluster.sum()
+    final_cluster = cluster_size.argmax()
+    # create map of the gaps in ocean area
+    gaps_mask = np.zeros_like(labeled_image)
+    gaps_mask[labeled_image >= 1] = 1
+    gaps_mask[labeled_image == final_cluster] = 0
+    # fill gaps
+    mask[gaps_mask == 1] = 254
+    # #############################################################################################
+    # Take largest connected component of ocean as ocean
+    # #############################################################################################
+    # Connected Component Analysis
+    ocean_mask = mask >= 254  # Ocean (254)
+    labeled_image, num_cluster = skimage.measure.label(ocean_mask, connectivity=2, return_num=True)
+    if num_cluster == 0:
+        return mask
+    # extract largest cluster
+    cluster_size = np.zeros(num_cluster + 1)  # +1 for background
+    for cluster_label in range(1, num_cluster + 1):  # +1 as range(x, y) is exclusive for y
+        cluster = labeled_image == cluster_label
+        cluster_size[cluster_label] = cluster.sum()
+    final_cluster = cluster_size.argmax()
+    final_mask = labeled_image == final_cluster
+    # overwrite small ocean cluster (254) with glacier value (127) (it is not important with what value these are
+    # filled, as these pixels are not at the boundary between ocean and glacier anymore and hence do not contribute to
+    # the front delineation)
+    mask[mask == 254] = 127
+    mask[final_mask] = 254
+    return mask
+def extract_front_from_zones(zone_mask, front_length_threshold):
+    """
+    Extract front prediction from zone segmentation by choosing the boundary between glacier and ocean as front and deleting to short fronts.
+    :param zone_mask: zone segmentation prediction
+    :param front_length_threshold: Threshold for deletion of too short front predictions
+    :return: the front prediction
+    """
+    # detect edge between ocean and glacier
+    mask_mi = np.pad(zone_mask, ((1, 1), (1, 1)), mode='constant')
+    mask_le = np.pad(zone_mask, ((1, 1), (0, 2)), mode='constant')
+    mask_ri = np.pad(zone_mask, ((1, 1), (2, 0)), mode='constant')
+    mask_do = np.pad(zone_mask, ((0, 2), (1, 1)), mode='constant')
+    mask_up = np.pad(zone_mask, ((2, 0), (1, 1)), mode='constant')
+    front = np.logical_and(mask_mi == 254, np.logical_or.reduce((mask_do == 127, mask_up == 127, mask_ri == 127, mask_le == 127)))
+    front = front[1:-1, 1:-1].astype(float)
+    # delete too short fronts
+    labeled_front, num_cluster = skimage.measure.label(front, connectivity=2, return_num=True)
+    if num_cluster == 0:
+        return front * 255
+    for cluster_label in range(1, num_cluster + 1):  # +1 as range(x, y) is exclusive for y
+        cluster = labeled_front == cluster_label
+        cluster_size = cluster.sum()
+        if cluster_size <= front_length_threshold:
+            front[labeled_front == cluster_label] = 0
+        else:
+            front[labeled_front == cluster_label] = 1
+    front *= 255
+    return front
+def postprocess_front_segmenation(complete_predicted_mask, threshold_front_length):
+    """
+    Post-process the front segmentation by skeletonization, filament extraction, and deletion of too short fronts
+    :param complete_predicted_mask: front segmentation prediction
+    :param threshold_front_length: Threshold for deletion of too short front predictions
+    :return: the post-processed front prediction
+    """
+    if len(np.unique(complete_predicted_mask)) == 1:
+        print(f"No front predicted {np.unique(complete_predicted_mask)}")
+        return complete_predicted_mask
+    skeleton = skeletonize(complete_predicted_mask)
+    fil = FilFinder2D(skeleton, distance=None, mask=skeleton)
+    fil.preprocess_image(skip_flatten=True)
+    fil.create_mask(use_existing_mask=True)
+    fil.medskel(verbose=False)
+    fil.analyze_skeletons(skel_thresh=5 * u.pix)
+    # find longest path through the skeleton and delete all other branches
+    skeleton_longpaths = fil.skeleton_longpath
+    # delete fronts that are too short
+    labeled_skeleton_longpaths, num_cluster = skimage.measure.label(skeleton_longpaths, connectivity=2, return_num=True)
+    if num_cluster == 0:
+        return skeleton_longpaths
+    for cluster_label in range(1, num_cluster + 1):  # +1 as range(x, y) is exclusive for y
+        cluster = labeled_skeleton_longpaths == cluster_label
+        cluster_size = cluster.sum()
+        if cluster_size <= threshold_front_length:
+            skeleton_longpaths[labeled_skeleton_longpaths == cluster_label] = 0
+        else:
+            skeleton_longpaths[labeled_skeleton_longpaths == cluster_label] = 1
+    return skeleton_longpaths

documentation/common_problems_and_solutions.md ADDED Viewed

	@@ -0,0 +1,104 @@

+# Common Issues and their Solutions
+## RuntimeError: Expected scalar type half but found float
+This can happen when running inference (or training) with mixed precision enabled on older GPU hardware. It points
+to some operation not being implemented in half precision for the type of GPU you are using. There are flags to enforce
+ the use of fp32 for both nnUNet_predict and nnUNet_train. If you run into this error, using these flags will probably
+ solve it. See `nnUNet_predict -h` and `nnUNet_train -h` for what the flags are.
+## nnU-Net gets 'stuck' during preprocessing, training or inference
+nnU-Net uses python multiprocessing to leverage multiple CPU cores during preprocessing, background workers for data
+augmentation in training, preprocessing of cases during inference as well as resampling and exporting the final
+predictions during validation and inference. Unfortunately, python (or maybe it is just me as a programmer) is not
+very good at communicating errors that happen in background workers, causing the main process to indefinitely wait for
+them to return indefinitely.
+Whenever nnU-Net appears to be stuck, this is what you should do:
+1) There is almost always an error message which will give you an indication of what the problem is. This error message
+is often not at the bottom of the text output, but further up. If you run nnU-Net on a GPU cluster (like we do) the
+error message may be WAYYYY off in the log file, sometimes at the very start of the training/inference. Locate the
+error message (if necessary copy the stdout to a text editor and search for 'error')
+2) If there is no error message, this could mean that your OS silently killed a background worker because it was about
+to go out of memory. In this case, please rerun whatever command you have been running and closely monitor your system
+RAM (not GPU memory!) usage. If your RAM is full or close to full, you need to take action:
+   - reduce the number of background workers: use `-tl` and `-tf` in `nnUNet_plan_and_preprocess` (you may have to
+   go as low as 1!). Reduce the number of workers used by `nnUNet_predict` by reducing `--num_threads_preprocessing` and
+   `--num_threads_nifti_save`.
+   - If even `-tf 1` during preprocessing is not low enough, consider adding a swap partition located on an SSD.
+   - upgrade your RAM! (32 GB should get the job done)
+## nnU-Net training: RuntimeError: CUDA out of memory
+This section is dealing with error messages such as this:
+```
+RuntimeError: CUDA out of memory. Tried to allocate 4.16 GiB (GPU 0; 10.76 GiB total capacity; 2.82 GiB already allocated; 4.18 GiB free; 4.33 GiB reserved in total by PyTorch)
+```
+This message appears when the GPU memory is insufficient. For most datasets, nnU-Net uses about 8GB of video memory.
+To ensure that you can run all trainings, we recommend to use a GPU with at least 11GB (this will have some headroom).
+If you are running other programs on the GPU you intend to train on (for example the GUI of your operating system),
+the amount of VRAM available to nnU-Net is less than whatever is on your GPU. Please close all unnecessary programs or
+invest in a second GPU. We for example like to use a low cost GPU (GTX 1050 or slower) for the display outputs while
+having the 2080ti (or equivelant) handle the training.
+At the start of each training, cuDNN will run some benchmarks in order to figure out the fastest convolution algorithm
+for the current network architecture (we use `torch.backends.cudnn.benchmark=True`). VRAM consumption will jump all over
+the place while these benchmarks run and can briefly exceed the 8GB nnU-Net typically requires. If you keep running into
+ `RuntimeError: CUDA out of memory` problems you may want to consider disabling that. You can do so by setting the
+ `--deterministic` flag when using `nnUNet_train`. Setting this flag can slow down your training, so it is recommended
+ to only use it if necessary.
+## nnU-Net training in Docker container: RuntimeError: unable to write to file </torch_781_2606105346>
+Nvidia NGC (https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) is a great place to find Docker containers with
+the most recent software (pytorch, cuDNN, etc.) in them. When starting Docker containers with command provided on the
+Nvidia website, the docker will crash with errors like this when running nnU-Net: `RuntimeError: unable to write to
+file </torch_781_2606105346>`. Please start the docker with the `--ipc=host` flag to solve this.
+## Downloading pretrained models: unzip: cannot find zipfile directory in one of /home/isensee/.nnunetdownload_16031094034174126
+Sometimes downloading the large zip files containing our pretrained models can fail and cause the error above. Please
+make sure to use the most recent nnU-Net version (we constantly try to improve the downloading). If that does not fix it
+you can always download the zip file from our zenodo (https://zenodo.org/record/4003545) and use the
+`nnUNet_install_pretrained_model_from_zip` command to install the model.
+## Downloading pre-trained models: `unzip: 'unzip' is not recognized as an internal or external command` OR `Command 'unzip' not found`
+On Windows systems and on a bare WSL2 system, the `unzip` command may not be present.
+Either install it, unzip the pre-trained model from zenodo download, or update to a newer version of nnUNet that uses the Python build in
+(https://docs.python.org/3/library/zipfile.html)
+## nnU-Net training (2D U-Net): High (and increasing) system RAM usage, OOM
+There was a issue with mixed precision causing a system RAM memory leak. This is fixed when using cuDNN 8.0.2 or newer,
+but the current pytorch master comes with cuDNN 7.6.5. If you encounter this problem, please consider using Nvidias NGC
+pytorch container for training (the pytorch it comes with has a recent cuDNN version). You can also install the new
+cuDNN version on your system and compile pytorch yourself (instructions on the pytorch website!). This is what we do at DKFZ.
+## nnU-Net training of cascade: Error `seg from prev stage missing`
+You need to run all five folds of `3d_lowres`. Segmentations of the previous stage can only be generated from the
+validation set, otherwise we would overfit.
+## nnU-Net training: `RuntimeError: CUDA error: device-side assert triggered`
+This error often goes along with something like `void THCudaTensor_scatterFillKernel(TensorInfo<Real, IndexType>,
+TensorInfo<long, IndexType>, Real, int, IndexType) [with IndexType = unsigned int, Real = float, Dims = -1]:
+block: [4770,0,0], thread: [374,0,0] Assertion indexValue >= 0 && indexValue < tensor.sizes[dim] failed.`.
+This means that your dataset contains unexpected values in the segmentations. nnU-Net expects all labels to be
+consecutive integers. So if your dataset has 4 classes (background and three foregound labels), then the labels
+must be 0, 1, 2, 3 (where 0 must be background!). There cannot be any other values in the ground truth segmentations.
+If you run `nnUNet_plan_and_preprocess` with the `--verify_dataset_integrity` option, this should never happen because
+it will check for wrong values in the label images.
+## nnU-Net training: Error: mmap length is greater than file size and EOFError
+Please delete all .npy files in the nnUNet_preprocessed folder of the test you were trying to train. Then try again.
+## running nnU-Net on Azure instances
+see https://github.com/MIC-DKFZ/nnUNet/issues/437, thank you @Alaska47

documentation/common_questions.md ADDED Viewed

	@@ -0,0 +1,201 @@

+# FAQ
+## Where can I find the segmentation metrics of my experiments?
+**Results for the validation sets of each fold** are stored in the respective output folder after the training is completed. For example, this could be.
+`${RESULTS_FOLDER}/nnUNet/3d_fullres/Task003_Liver/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0`. After training there will
+ be a `validation_raw` subfolder and a `validation_raw_postprocessed` subfolder. In each of these folders is going to
+ be a `summary.json` file with the segmentation metrics. There are metrics for each individual validation case and then
+ at the bottom there is also a mean across all cases.
+**Cross-validation metrics** can only be computed after all five folds were run. You first need to run
+`nnUNet_determine_postprocessing` first (see `nnUNet_determine_postprocessing -h` for help). This will collect the
+predictions from the validation sets of the five folds, compute metrics on them and then determine the postprocessing.
+Once this is all done, there will be new folders located in the output directory (for example
+`${RESULTS_FOLDER}/nnUNet/3d_fullres/Task003_Liver/nnUNetTrainerV2__nnUNetPlansv2.1/`): `cv_niftis_raw` (raw predictions
+from the cross-validation) and `cv_niftis_postprocessed` (postprocessed predictions). In each of these folders is
+going to be a `summary.json` file with the metrics (see above).
+Note that the postprocessing determined on each individual fold is completely ignored by nnU-Net because it needs to
+find a single postprocessing configuration for the whole cross-validation. The postprocessed results in each fold are
+just for development purposes!
+**Test set results** see [here](#evaluating-test-set-results).
+**Ensemble performance** will be accessible here `${RESULTS_FOLDER}/nnUNet/ensembles/TASKNAME` after you ran
+`nnUNet_find_best_configuration`. There are summary.csv for a quick overview and then there is also going to be
+detailed results in the form of `summary.json` in the respective subfolders.
+## What postprocessing is selected?
+After you run `nnUNet_determine_postprocessing` (see `nnUNet_determine_postprocessing -h` for help) there will be a
+`postprocessing.json` file located in the output directory of your training (for example
+`${RESULTS_FOLDER}/nnUNet/3d_fullres/Task003_Liver/nnUNetTrainerV2__nnUNetPlansv2.1/`). If you open this with a text
+editor, there is a key "for_which_classes", followed by some list. For LiTS (classes 0: bg, 1: liver, 2: tumor)
+this can for example be:
+```python
+    "for_which_classes": [
+        [
+            1,
+            2
+        ],
+        1
+```
+This means that nnU-Net will first remove all but the largest components for the merged object consisting of classes
+1 and 2 (essentially the liver including the tumors) and then in a second step also remove all but the largest
+connected component for the liver class.
+Note that you do not have to run `nnUNet_determine_postprocessing` if you use `nnUNet_find_best_configuration`.
+`nnUNet_find_best_configuration` will do that for you.
+Ensemble results and postprocessing will be stored in `${RESULTS_FOLDER}/nnUNet/ensembles`
+(this will all be generated by `nnUNet_find_best_configuration`).
+## Evaluating test set results
+This feature was only added recently. Please run `pip install --upgrade nnunet` or reinstall nnunet from the master.
+You can now use `nnUNet_evaluate_folder` to compute metrics on predicted test cases. For example:
+```
+nnUNet_evaluate_folder -ref FOLDER_WITH_GT -pred FOLDER_WITH_PREDICTIONS -l 1 2 3 4
+```
+This example is for a dataset that has 4 foreground classes (labels 1, 2, 3, 4). `FOLDER_WITH_GT` and
+`FOLDER_WITH_PREDICTIONS` must contain files with the same names containing the reference and predicted segmentations
+of each case, respectivelty. The files must be nifti (end with .nii.gz).
+## Creating and managing data splits
+At the start of each training, nnU-Net will check whether the splits_final.pkl file is present in the directory where
+the preprocessed data of the requested dataset is located. If the file is not present, nnU-Net will create its own
+split: a five-fold cross-validation using all the available training cases. nnU-Net needs this five-fold
+cross-validation to be able to determine the postprocessing and to run model/ensemble selection.
+There are however situations in which you may want to create your own split, for example
+- in datasets like ACDC where several training cases are connected (there are two time steps for each patient) you
+may need to manually create splits to ensure proper stratification.
+- cases are annotated by multiple annotators and you would like to use the annotations as separate training examples
+- if you are running experiments with a domain transfer, you might want to train only on cases from domain A and
+validate on domain B
+- ...
+Creating your own data split is simple: the splits_final.pkl file contains the following data structure (assume there are five training cases A, B, C, D, and E):
+```python
+splits = [
+    {'train': ['A', 'B', 'C', 'D'], 'val': ['E']},
+    {'train': ['A', 'B', 'C', 'E'], 'val': ['D']},
+    {'train': ['A', 'B', 'D', 'E'], 'val': ['C']},
+    {'train': ['A', 'C', 'D', 'E'], 'val': ['B']},
+    {'train': ['B', 'C', 'D', 'E'], 'val': ['A']}
+]
+```
+Use load_pickle and save_pickle from batchgenerators.utilities.file_and_folder_operations for loading/storing the splits.
+Splits is a list of length NUMBER_OF_FOLDS. Each entry in the list is a dict, with 'train' and 'val' as keys and lists
+of the corresponding case names (without the _0000 etc!) as values.
+nnU-Net's five-fold cross validation will always create a list of len(splits)=5. But you can do whatever you want. Note
+that if you define only 4 splits (fold 0-3) and then set fold=4 when training (that would be the fifth split),
+nnU-Net will print a warning and proceed to use a random 80:20 data split.
+## How can I swap component XXX (for example the loss) of nnU-Net?
+All changes in nnU-Net are handled the same way:
+1) create a new nnU-Net trainer class. Place the file somewhere in the nnunet.training.network_training folder
+(any subfolder will do. If you create a new subfolder, make sure to include an empty `__init__.py` file!)
+2) make your new trainer class derive from the trainer you would like to change (most likely this is going to be nnUNetTrainerV2)
+3) identify the function that you need to overwrite. You may have to go up the inheritance hierarchy to find it!
+4) overwrite that function in your custom trainer, make sure whatever you do is compatible with the rest of nnU-Net
+What these changes need to look like specifically is hard to say without knowing what you are exactly trying to do.
+Before you open a new issue on GitHub, please have a look around the `nnunet.training.network_training` folder first!
+There are tons of examples modifying various parts of the pipeline.
+Also see [here](extending_nnunet.md)
+## How does nnU-Net handle multi-modal images?
+Multi-modal images are treated as color channels. BraTS, which comes with T1, T1c, T2 and Flair images for each
+training case will thus for example have 4 input channels.
+## Why does nnU-Net not use all my GPU memory?
+nnU-net and all its parameters are optimized for a training setting that uses about 8GB of VRAM for a network training.
+Using more VRAM will not speed up the training. Using more VRAM has also not (yet) been beneficial for model
+performance consistently enough to make that the default. If you really want to train with more VRAM, you can do one of these things:
+1) Manually edit the plans files to increase the batch size. A larger batch size gives better (less noisy) gradients
+and may improve your model performance if the dataset is large. Note that nnU-Net always runs for 1000 epochs with 250
+iterations each (250000 iterations). The training time thus scales approximately linearly with the batch size
+(batch size 4 is going to need twice as long for training than batch size 2!)
+2) Manually edit the plans files to increase the patch size. This one is tricky and should only been attempted if you
+know what you are doing! Again, training times will be increased if you do this! 3) is a better way of increasing the
+patch size.
+3) Run `nnUNet_plan_and_preprocess` with a larger GPU memory budget. This will make nnU-Net plan for larger patch sizes
+during experiment planning. Doing this can change the patch size, network topology, the batch size as well as the
+presence of the U-Net cascade. To run with a different memory budget, you need to specify a different experiment planner, for example
+`nnUNet_plan_and_preprocess -t TASK_ID -pl2d None -pl3d ExperimentPlanner3D_v21_32GB` (note that `-pl2d None` will
+disable 2D U-Net configuration. There is currently no planner for larger 2D U-Nets). We have planners for 8 GB (default),
+11GB and 32GB available. If you need a planner for a different GPU size, you should be able to quickly hack together
+your own using the code of the 11GB or 32GB planner (same goes for a 2D planner). Note that we have experimented with
+these planners and not found an increase in segmentation performance as a result of using them. Training times are
+again longer than with the default.
+## Do I need to always run all U-Net configurations?
+The model training pipeline above is for challenge participations. Depending on your task you may not want to train all
+U-Net models and you may also not want to run a cross-validation all the time.
+Here are some recommendations about what U-Net model to train:
+- It is safe to say that on average, the 3D U-Net model (3d_fullres) was most robust. If you just want to use nnU-Net because you
+need segmentations, I recommend you start with this.
+- If you are not happy with the results from the 3D U-Net then you can try the following:
+  - if your cases are very large so that the patch size of the 3d U-Net only covers a very small fraction of an image then
+  it is possible that the 3d U-Net cannot capture sufficient contextual information in order to be effective. If this
+  is the case, you should consider running the 3d U-Net cascade (3d_lowres followed by 3d_cascade_fullres)
+  - If your data is very anisotropic then a 2D U-Net may actually be a better choice (Promise12, ACDC, Task05_Prostate
+  from the decathlon are examples for anisotropic data)
+You do not have to run five-fold cross-validation all the time. If you want to test single model performance, use
+ *all* for `FOLD` instead of a number. Note that this will then not give you an estimate of your performance on the
+ training set. You will also no tbe able to automatically identify which ensembling should be used and nnU-Net will
+ not be able to configure a postprocessing.
+CAREFUL: DO NOT use fold=all when you intend to run the cascade! You must run the cross-validation in 3d_lowres so
+that you get proper (=not overfitted) low resolution predictions.
+## Sharing Models
+You can share trained models by simply sending the corresponding output folder from `RESULTS_FOLDER/nnUNet` to
+whoever you want share them with. The recipient can then use nnU-Net for inference with this model.
+You can now also use `nnUNet_export_model_to_zip` to export a trained model (or models) to a zip file. The recipient
+can then use `nnUNet_install_pretrained_model_from_zip` to install the model from this zip file.
+## Can I run nnU-Net on smaller GPUs?
+nnU-Net is guaranteed to run on GPUs with 11GB of memory. Many configurations may also run on 8 GB.
+If you have an 11GB and there is still an `Out of Memory` error, please read 'nnU-Net training: RuntimeError: CUDA out of memory' [here](common_problems_and_solutions.md).
+If you wish to configure nnU-Net to use a different amount of GPU memory, simply adapt the reference value for the GPU memory estimation
+accordingly (with some slack because the whole thing is not an exact science!). For example, in
+[experiment_planner_baseline_3DUNet_v21_11GB.py](nnunet/experiment_planning/experiment_planner_baseline_3DUNet_v21_11GB.py)
+we provide an example that attempts to maximise the usage of GPU memory on 11GB as opposed to the default which leaves
+much more headroom). This is simply achieved by this line:
+```python
+ref = Generic_UNet.use_this_for_batch_size_computation_3D * 11 / 8
+```
+with 8 being what is currently used (approximately) and 11 being the target. Should you get CUDA out of memory
+issues, simply reduce the reference value. You should do this adaptation as part of a separate ExperimentPlanner class.
+Please read the instructions [here](documentation/extending_nnunet.md).
+## Why is no 3d_lowres model created?
+3d_lowres is created only if the patch size in 3d_fullres less than 1/8 of the voxels of the median shape of the data
+in 3d_fullres (for example Liver is about 512x512x512 and the patch size is 128x128x128, so that's 1/64 and thus
+3d_lowres is created). You can enforce the creation of 3d_lowres models for smaller datasets by changing the value of
+`HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0` (located in experiment_planning.configuration).

documentation/data_format_inference.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# Data format for Inference
+The data format for inference must match the one used for the raw data (specifically, the images must be in exactly
+the same format as in the imagesTr folder). As before, the filenames must start with a
+unique identifier, followed by a 4-digit modality identifier. Here is an example for two different datasets:
+1) Task005_Prostate:
+    This task has 2 modalities, so the files in the input folder must look like this:
+        input_folder
+        ├── prostate_03_0000.nii.gz
+        ├── prostate_03_0001.nii.gz
+        ├── prostate_05_0000.nii.gz
+        ├── prostate_05_0001.nii.gz
+        ├── prostate_08_0000.nii.gz
+        ├── prostate_08_0001.nii.gz
+        ├── ...
+    _0000 is always the T2 image and _0001 is always the ADC image (as specified by 'modality' in the dataset.json)
+2) Task002_Heart:
+        imagesTs
+        ├── la_001_0000.nii.gz
+        ├── la_002_0000.nii.gz
+        ├── la_006_0000.nii.gz
+        ├── ...
+    Task002 only has one modality, so each case only has one _0000.nii.gz file.
+The segmentations in the output folder will be named INDENTIFIER.nii.gz (omitting the modality identifier).

documentation/dataset_conversion.md ADDED Viewed

	@@ -0,0 +1,213 @@

+# Dataset conversion instructions
+nnU-Net requires the raw data to be brought into a specific format so that it know how to read and interpret it. This
+format closely, but not entirely, follows the format used by the
+[Medical Segmentation Decathlon](http://medicaldecathlon.com/) (MSD).
+The entry point to nnU-Net is the nnUNet_raw_data_base folder (which the user needs to specify when installing nnU-Net!).
+Each segmentation dataset is stored as a separate 'Task'. Tasks are associated with a task ID, a three digit integer
+(this is different from the MSD!) and
+a task name (which you can freely choose): Task005_Prostate has 'Prostate' as task name and the task id is 5. Tasks are stored in the
+nnUNet_raw_data_base/nnUNet_raw_data folder like this:
+    nnUNet_raw_data_base/nnUNet_raw_data/
+    ├── Task001_BrainTumour
+    ├── Task002_Heart
+    ├── Task003_Liver
+    ├── Task004_Hippocampus
+    ├── Task005_Prostate
+    ├── ...
+Within each task folder, the following structure is expected:
+    Task001_BrainTumour/
+    ├── dataset.json
+    ├── imagesTr
+    ├── (imagesTs)
+    └── labelsTr
+**Please make your custom task ids start at 500 to ensure that there will be no conflicts with downloaded pretrained models!!! (IDs cannot exceed 999)**
+imagesTr contains the images belonging to the training cases. nnU-Net will run pipeline configuration, training with
+cross-validation, as well as finding postprocesing and the best ensemble on this data. imagesTs (optional) contains the
+images that belong to the
+test cases , labelsTr the images with the ground truth segmentation maps for the training cases. dataset.json contains
+metadata of the dataset.
+Each training case is associated with an identifier = a unique name for that case. This identifier is used by nnU-Net to
+recognize which label file belongs to which image. **All images (including labels) must be 3D nifti files (.nii.gz)!**
+The image files can have any scalar pixel type. The label files must contain segmentation maps that contain consecutive integers,
+starting with 0: 0, 1, 2, 3, ... num_labels. 0 is considered background. Each class then has its own associated integer
+value.
+Images may have multiple modalities. This is especially often the case for medical images. Modalities are very much
+like color channels in photos (three color channels: red, green blue), but can be much more diverse: CT, different types
+or MRI, and many other. Imaging modalities are identified by nnU-Net by their suffix: a four-digit integer at the end
+of the filename. Imaging files must therefore follow the following naming convention: case_identifier_XXXX.nii.gz.
+Hereby, XXXX is the modality identifier. What modalities these identifiers belong to is specified in the dataset.json
+file (see below). Label files are saved as case_identifier.nii.gz
+This naming scheme results in the following folder structure. It is the responsibility of the user to bring their
+data into this format!
+Here is an example for the first Task of the MSD: BrainTumour. Each image has four modalities: FLAIR (0000),
+T1w (0001), T1gd (0002) and T2w (0003). Note that the imagesTs folder is optional and does not have to be present.
+    nnUNet_raw_data_base/nnUNet_raw_data/Task001_BrainTumour/
+    ├── dataset.json
+    ├── imagesTr
+    │   ├── BRATS_001_0000.nii.gz
+    │   ├── BRATS_001_0001.nii.gz
+    │   ├── BRATS_001_0002.nii.gz
+    │   ├── BRATS_001_0003.nii.gz
+    │   ├── BRATS_002_0000.nii.gz
+    │   ├── BRATS_002_0001.nii.gz
+    │   ├── BRATS_002_0002.nii.gz
+    │   ├── BRATS_002_0003.nii.gz
+    │   ├── BRATS_003_0000.nii.gz
+    │   ├── BRATS_003_0001.nii.gz
+    │   ├── BRATS_003_0002.nii.gz
+    │   ├── BRATS_003_0003.nii.gz
+    │   ├── BRATS_004_0000.nii.gz
+    │   ├── BRATS_004_0001.nii.gz
+    │   ├── BRATS_004_0002.nii.gz
+    │   ├── BRATS_004_0003.nii.gz
+    │   ├── ...
+    ├── imagesTs
+    │   ├── BRATS_485_0000.nii.gz
+    │   ├── BRATS_485_0001.nii.gz
+    │   ├── BRATS_485_0002.nii.gz
+    │   ├── BRATS_485_0003.nii.gz
+    │   ├── BRATS_486_0000.nii.gz
+    │   ├── BRATS_486_0001.nii.gz
+    │   ├── BRATS_486_0002.nii.gz
+    │   ├── BRATS_486_0003.nii.gz
+    │   ├── BRATS_487_0000.nii.gz
+    │   ├── BRATS_487_0001.nii.gz
+    │   ├── BRATS_487_0002.nii.gz
+    │   ├── BRATS_487_0003.nii.gz
+    │   ├── BRATS_488_0000.nii.gz
+    │   ├── BRATS_488_0001.nii.gz
+    │   ├── BRATS_488_0002.nii.gz
+    │   ├── BRATS_488_0003.nii.gz
+    │   ├── BRATS_489_0000.nii.gz
+    │   ├── BRATS_489_0001.nii.gz
+    │   ├── BRATS_489_0002.nii.gz
+    │   ├── BRATS_489_0003.nii.gz
+    │   ├── ...
+    └── labelsTr
+        ├── BRATS_001.nii.gz
+        ├── BRATS_002.nii.gz
+        ├── BRATS_003.nii.gz
+        ├── BRATS_004.nii.gz
+        ├── ...
+Here is another example of the second task of the MSD, which has only one modality:
+    nnUNet_raw_data_base/nnUNet_raw_data/Task002_Heart/
+    ├── dataset.json
+    ├── imagesTr
+    │   ├── la_003_0000.nii.gz
+    │   ├── la_004_0000.nii.gz
+    │   ├── ...
+    ├── imagesTs
+    │   ├── la_001_0000.nii.gz
+    │   ├── la_002_0000.nii.gz
+    │   ├── ...
+    └── labelsTr
+        ├── la_003.nii.gz
+        ├── la_004.nii.gz
+        ├── ...
+For each training case, all images must have the same geometry to ensure that their pixel arrays are aligned. Also
+make sure that all your data is co-registered!
+The dataset.json file used by nnU-Net is identical to the ones used by the MSD. For your custom tasks you need to create
+them as well and thereby exactly follow the same structure. [This](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2)
+is where you can download the MSD data for reference.
+**NEW:** There now is a utility with which you can generate the dataset.json automatically. You can find it
+[here](../nnunet/dataset_conversion/utils.py) (look for the function `generate_dataset_json`).
+See [Task120](../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py) for an example on how to use it. And read
+its documentation!
+Here is the content of the dataset.json from the Prostate task:
+    {
+     "name": "PROSTATE",
+     "description": "Prostate transitional zone and peripheral zone segmentation",
+     "reference": "Radboud University, Nijmegen Medical Centre",
+     "licence":"CC-BY-SA 4.0",
+     "relase":"1.0 04/05/2018",
+     "tensorImageSize": "4D",
+     "modality": {
+       "0": "T2",
+       "1": "ADC"
+     },
+     "labels": {
+       "0": "background",
+       "1": "PZ",
+       "2": "TZ"
+     },
+     "numTraining": 32,
+     "numTest": 16,
+     "training":[{"image":"./imagesTr/prostate_16.nii.gz","label":"./labelsTr/prostate_16.nii.gz"},{"image":"./imagesTr/prostate_04.nii.gz","label":"./labelsTr/prostate_04.nii.gz"},...],
+     "test": ["./imagesTs/prostate_08.nii.gz","./imagesTs/prostate_22.nii.gz","./imagesTs/prostate_30.nii.gz",...]
+     }
+Note that we truncated the "training" and "test" lists for clarity. You need to specify all the cases in there. If you
+don't have test images (imagesTs does not exist) you can leave "test" blank: `"test": []`.
+Please also have a look at the python files located [here](../nnunet/dataset_conversion). They show how we created our
+custom dataset.jsons for a range of public datasets.
+## How to use decathlon datasets
+The previous release of nnU-Net allowed users to either start with 4D or 3D niftis. This resulted in some confusion,
+however, because some users would not know where they should save their data. We therefore dropped support for the 4D
+niftis used by the MSD. Instead, we provide a utility that converts the MSD datasets into the format specified above:
+```bash
+nnUNet_convert_decathlon_task -i FOLDER_TO_TASK_AS_DOWNLOADED_FROM_MSD -p NUM_PROCESSES
+```
+FOLDER_TO_TASK_AS_DOWNLOADED_FROM_MSD needs to point to the downloaded task folder (such as Task05_Prostate, note the
+2-digit task id!). The converted Task will be saved under the same name in nnUNet_raw_data_base/nnUNet_raw_data
+(but with a 3 digit identifier). You can overwrite the task id of the converted task by using the `-output_task_id` option.
+## How to use 2D data with nnU-Net
+nnU-Net was originally built for 3D images. It is also strongest when applied to 3D segmentation problems because a
+large proportion of its design choices were built with 3D in mind. Also note that many 2D segmentation problems,
+especially in the non-biomedical domain, may benefit from pretrained network architectures which nnU-Net does not
+support.
+Still, there is certainly a need for an out of the box segmentation solution for 2D segmentation problems. And
+also on 2D segmentation tasks nnU-Net cam perform extremely well! We have, for example, won a 2D task in the cell
+tracking challenge with nnU-Net (see our Nature Methods paper) and we have also successfully applied nnU-Net to
+histopathological segmentation problems.
+Working with 2D data in nnU-Net requires a small workaround in the creation of the dataset. Essentially, all images
+must be converted to pseudo 3D images (so an image with shape (X, Y) needs to be converted to an image with shape
+(1, X, Y). The resulting image must be saved in nifti format. Hereby it is important to set the spacing of the
+first axis (the one with shape 1) to a value larger than the others. If you are working with niftis anyways, then
+doing this should be easy for you. This example here is intended for demonstrating how nnU-Net can be used with
+'regular' 2D images. We selected the massachusetts road segmentation dataset for this because it can be obtained
+easily, it comes with a good amount of training cases but is still not too large to be difficult to handle.
+See [here](../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py) for an example.
+This script contains a lot of comments and useful information. Also have a look
+[here](../nnunet/dataset_conversion/Task089_Fluo-N2DH-SIM.py).
+## How to update an existing dataset
+When updating a dataset you not only need to change the data located in `nnUNet_raw_data_base/nnUNet_raw_data`. Make
+sure to also delete the whole (!) corresponding dataset in `nnUNet_raw_data_base/nnUNet_cropped_data`. nnU-Net will not
+repeat the cropping (and thus will not update your dataset) if the old files are still in nnUNet_cropped_data!
+The best way of updating an existing dataset is (**choose one**):
+- delete all data and models belonging to the old version of the dataset (nnUNet_preprocessed, corresponding results
+  in RESULTS_FOLDER/nnUNet, nnUNet_cropped_data, nnUNet_raw_data), then update
+- (recommended) create the updated dataset from scratch using a new task ID **and** name
+## How to convert other image formats to nifti
+Please have a look at the following tasks:
+- [Task120](../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py): 2D png images
+- [Task075](../nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py) and [Task076](../nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py): 3D tiff
+- [Task089](../nnunet/dataset_conversion/Task089_Fluo-N2DH-SIM.py) 2D tiff

documentation/expected_epoch_times.md ADDED Viewed

	@@ -0,0 +1,173 @@

+# Introduction
+Trainings can take some time. A well-running training setup is essential to get the most of nnU-Net. nnU-Net does not
+require any fancy hardware, just a well-balanced system. We recommend at least 32 GB of RAM, 6 CPU cores (12 threads),
+SSD storage (this can be SATA and does not have to be PCIe. DO NOT use an external SSD connected via USB!) and a
+2080 ti GPU. If your system has multiple GPUs, the
+other components need to scale linearly with the number of GPUs.
+# Benchmark Details
+To ensure your system is running as intended, we provide some benchmark numbers against which you can compare. Here
+are the details about benchmarking:
+- We benchmark **2d**, **3d_fullres** and a modified 3d_fullres that uses 3x the default batch size (called **3d_fullres large** here)
+- The datasets **Task002_Heart**, **Task005_Prostate** and **Task003_Liver** of the Medical Segmentation Decathlon are used
+(they provide a good spectrum of dataset properties)
+- we use the nnUNetTrainerV2_5epochs trainer. This will run only for 5 epochs and it will skip validation.
+From the 5 epochs, we select the fastest one as the epoch time.
+- We will also be running the nnUNetTrainerV2_5epochs_dummyLoad trainer on the 3d_fullres config (called **3d_fullres dummy**). This trainer does not use
+the dataloader and instead uses random dummy inputs, bypassing all data augmentation (CPU) and I/O bottlenecks.
+- All trainings are done with mixed precision. This is why Pascal GPUs (Titan Xp) are so slow (they do not have
+tensor cores)
+# How to run the benchmark
+First go into the folder where the preprocessed data and plans file of the task you would like to use are located. For me this is
+`/home/fabian/data/nnUNet_preprocessed/Task002_Heart`
+Then run the following python snippet. This will create our custom **3d_fullres_large** configuration. Note that this
+large configuration will only run on GPUs with 16GB or more! We included it in the test because some GPUs
+(V100, and probably also A100) can shine when they get more work to do per iteration.
+```python
+from batchgenerators.utilities.file_and_folder_operations import *
+plans = load_pickle('nnUNetPlansv2.1_plans_3D.pkl')
+stage = max(plans['plans_per_stage'].keys())
+plans['plans_per_stage'][stage]['batch_size'] *= 3
+save_pickle(plans, 'nnUNetPlansv2.1_bs3x_plans_3D.pkl')
+```
+Now you can run the benchmarks. Each should only take a couple of minutes
+```bash
+nnUNet_train 2d nnUNetTrainerV2_5epochs TASKID 0
+nnUNet_train 3d_fullres nnUNetTrainerV2_5epochs TASKID 0
+nnUNet_train 3d_fullres nnUNetTrainerV2_5epochs_dummyLoad TASKID 0
+nnUNet_train 3d_fullres nnUNetTrainerV2_5epochs TASKID 0 -p nnUNetPlansv2.1_bs3x # optional, only for GPUs with more than 16GB of VRAM
+```
+The time we are interested in is the epoch time. You can find it in the text output (stdout) or the log file
+located in your `RESULTS_FOLDER`. Note that the trainers used here run for 5 epochs. Select the fastest time from your
+output as your benchmark time.
+# Results
+The following table shows the results we are getting on our servers/workstations. We are using pytorch 1.7.1 that we
+compiled ourselves using the instrucutions found [here](https://github.com/pytorch/pytorch#from-source). The cuDNN
+version we used is 8.1.0.77. You should be seeing similar numbers when you
+run the benchmark on your server/workstation. Note that fluctuations of a couple of seconds are normal!
+IMPORTANT: Compiling pytorch from source is currently mandatory for best performance! Pytorch 1.8 does not have
+working tensorcore acceleration for 3D convolutions when installed with pip or conda!
+IMPORTANT: A100 and V100 are very fast with the newer cuDNN versions and need more CPU workers to prevent bottlenecks,
+set the environment variable `nnUNet_n_proc_DA=XX`
+to increase the number of data augmentation workers. Recommended: 20 for V100, 32 for A100. Datasets with many input
+modalities (BraTS: 4) require A LOT of CPU and should be used with even larger values for `nnUNet_n_proc_DA`
+## Pytorch 1.7.1 compiled with cuDNN 8.1.0.77
+|                                   | A100 40GB (DGX A100) 400W | V100 32GB SXM3 (DGX2) 350W | V100 32GB PCIe 250W | Quadro RTX6000 24GB 260W | Titan RTX 24GB 280W | RTX 2080 ti 11GB 250W | Titan Xp 12GB 250W |
+|-----------------------------------|---------------------------|----------------------------|---------------------|--------------------------|---------------------|-----------------------|--------------------|
+| Task002_Heart 2d                  | 40.06                     | 66.03                      | 76.19               | 78.01                    | 79.78               | 98.49                 | 177.87             |
+| Task002_Heart 3d_fullres          | 51.17                     | 85.96                      | 99.29               | 110.47                   | 112.34              | 148.36                | 504.93             |
+| Task002_Heart 3d_fullres dummy    | 48.53                     | 79                         | 89.66               | 105.16                   | 105.56              | 138.4                 | 501.64             |
+| Task002_Heart 3d_fullres large    | 118.5                     | 220.45                     | 251.25              | 322.28                   | 300.96              | OOM                   | OOM                |
+|                                   |                           |                            |                     |                          |                     |                       |                    |
+| Task003_Liver 2d                  | 39.71                     | 60.69                      | 69.65               | 72.29                    | 76.17               | 92.54                 | 183.73             |
+| Task003_Liver 3d_fullres          | 44.48                     | 75.53                      | 87.19               | 85.18                    | 86.17               | 106.76                | 290.87             |
+| Task003_Liver 3d_fullres dummy    | 41.1                      | 70.96                      | 80.1                | 79.43                    | 79.43               | 101.54                | 289.03             |
+| Task003_Liver 3d_fullres large    | 115.33                    | 213.27                     | 250.09              | 261.54                   | 266.66              | OOM                   | OOM                |
+|                                   |                           |                            |                     |                          |                     |                       |                    |
+| Task005_Prostate 2d               | 42.21                     | 68.88                      | 80.46               | 83.62                    | 81.59               | 102.81                | 183.68             |
+| Task005_Prostate 3d_fullres       | 47.19                     | 76.33                      | 85.4                | 100                      | 102.05              | 132.82                | 415.45             |
+| Task005_Prostate 3d_fullres dummy | 43.87                     | 70.58                      | 81.32               | 97.48                    | 98.99               | 124.73                | 410.12             |
+| Task005_Prostate 3d_fullres large | 117.31                    | 209.12                     | 234.28              | 277.14                   | 284.35              | OOM                   | OOM                |
+# Troubleshooting
+Your epoch times are substantially slower than ours? That's not good! This section will help you figure out what is
+wrong. Note that each system is unique and we cannot help you find bottlenecks beyond providing the information
+presented in this section!
+## First step: Make sure you have the right software!
+In order to get maximum performance, you need to have pytorch compiled with a recent cuDNN version (8002 or newer is a must!).
+Unfortunately the currently provided pip/conda installable pytorch versions have a bug which causes their performance
+to be very low (see https://github.com/pytorch/pytorch/issues/57115 and https://github.com/pytorch/pytorch/issues/50153).
+They are about 2x-3x slower than the numbers we report in the table above.
+You need to have a pytorch version that was compiled from source to get maximum performance as shown in the table above.
+The easiest way to get that is by using the [Nvidia pytorch Docker](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch).
+If you cannot use docker, you will need to compile pytorch
+yourself. For that, first download and install cuDNN from the [Nvidia homepage](https://developer.nvidia.com/cudnn), then follow the
+[instructions on the pytorch website](https://github.com/pytorch/pytorch#from-source) on how to compile it.
+If you compiled pytorch yourself, you can check for the correct cuDNN version by running:
+```bash
+python -c 'import torch;print(torch.backends.cudnn.version())'
+```
+If the output is `8002` or higher, then you are good to go. If not you may have to take action. IMPORTANT: this
+only applies to pytorch that was compiled from source. pip/conda installed pytorch will report a new cuDNN version
+but still have poor performance due to the bug linked above.
+## Identifying the bottleneck
+If the software is up to date and you are still experiencing problems, this is how you can figure out what is going on:
+While a training is running, run `htop` and `watch -n 0.1 nvidia-smi` (depending on your region you may have to use
+`0,1` instead). If you have physical access to the machine, also have a look at the LED indicating I/O activity.
+Here is what you can read from that:
+- `nvidia-smi` shows the GPU activity. `watch -n 0.1` makes this command refresh every 0.1s. This will allow you to
+see your GPU in action. A well running training will have your GPU pegged at 90-100% with no drops in GPU utilization.
+Your power should also be close to the maximum (for example `237W / 250 W`) at all times.
+- `htop` gives you an overview of the CPU usage. nnU-Net uses 12 processes for data augmentation + one main process.
+This means that up to 13 processes should be running simultaneously.
+- the I/O LED indicates that your system is reading/writing data from/to your hard drive/SSD. Whenever this is
+blinking your system is doing something with your HDD/SSD.
+### GPU bottleneck
+If `nvidia-smi` is constantly showing 90-100% GPU utilization and the reported power draw is near the maximum, your
+GPU is the bottleneck. This is great! That means that your other components are not slowing it down. Your epochs times
+should be the same as ours reported above. If they are not then you need to investigate your software stack (see cuDNN stuff above).
+What can you do about it?
+1) There is nothing holding you back. Everything is fine!
+2) If you need faster training, consider upgrading your GPU. Performance numbers are above, feel free to use them for guidance.
+3) Think about whether you need more (slower) GPUs or less (faster) GPUs. Make sure to include Server/Workstation
+costs into your calculations. Sometimes it is better to go with more cheaper but slower GPUs run run multiple trainings
+in parallel.
+### CPU bottleneck
+You can recognize a CPU bottleneck as follows:
+1) htop is consistently showing 10+ processes that are associated with your nnU-Net training
+2) nvidia-smi is reporting jumps of GPU activity with zeroes in between
+What can you do about it?
+1) Depending on your single core performance, some datasets may require more than the default 12 processes for data
+augmentation. The CPU requirements for DA increase roughly linearly with the number of input modalities. Most datasets
+will train fine with much less than 12 (6 or even just 4). But datasets with for example 4 modalities may require more.
+If you have more than 12 CPU threads available, set the environment variable `nnUNet_n_proc_DA` to a number higher than 12.
+2) If your CPU has less than 12 threads in total, running 12 threads can overburden it. Try lowering `nnUNet_n_proc_DA`
+to the number of threads you have available.
+3) (sounds stupid, but this is the only other way) upgrade your CPU. I have seen Servers with 8 CPU cores (16 threads)
+ and 8 GPUs in them. That is not well balanced. CPUs are cheap compared to GPUs. On a 'workstation' (single or dual GPU)
+ you can get something like a Ryzen 3900X or 3950X. On a server you could consider Xeon 6226R or 6258R on the Intel
+ side or the EPYC 7302P, 7402P, 7502P or 7702P on the AMD side. Make sure to scale the number of cores according to your
+ number of GPUs and use case. Feel free to also use our nnU-net recommendations from above.
+### I/O bottleneck
+On a workstation, I/O bottlenecks can be identified by looking at the LED indicating I/O activity. This is what an
+I/O bottleneck looks like:
+- nvidia-smi is reporting jumps of GPU activity with zeroes in between
+- htop is not showing many active CPU processes
+- I/O LED is blinking rapidly or turned on constantly
+Detecting I/O bottlenecks is difficult on servers where you may not have physical access. Tools like `iotop` are
+difficult to read and can only be run with sudo. However, the presence of an I/O LED is not strictly necessary. If
+- nvidia-smi is reporting jumps of GPU activity with zeroes in between
+- htop is not showing many active CPU processes
+then the only possible issue to my knowledge is in fact an I/O bottleneck.
+Here is what you can do about an I/O bottleneck:
+1) Make sure you are actually using an SSD to store the preprocessed data (`nnUNet_preprocessed`). Do not use an
+SSD connected via USB! Never use a HDD. Do not use a network drive that was not specifically designed to handle fast I/O
+(Note that you can use a network drive if it was designed for this purpose. At the DKFZ we use a
+[flashblade](https://www.purestorage.com/products/file-and-object/flashblade.html) connected via ethernet and that works
+great)
+2) A SATA SSD is only enough to feed 1-2 GPUs. If you have more GPUs installed you may have to upgrade to an nvme
+drive (make sure to get PCIe interface!).

documentation/extending_nnunet.md ADDED Viewed

	@@ -0,0 +1,119 @@

+# Extending/Changing nnU-Net
+To use nnU-Net as a framework and make changes to its components, please make sure to install it with the `git clone`
+and `pip install -e .` commands so that a local copy of the code is created.
+Changing components of nnU-Net needs to be done in different places, depending on whether these components belong to
+the inferred, blueprint or empirical parameters. We cover some of the most common use cases below. They should give
+you a good indication of where to start.
+Generally it is recommended to look into the code where the thing you would like to change is currently implemented
+and then derive a strategy on how to change it. If you have any questions, feel free to open an issue on GitHub and
+we will help you as much as we can.
+## Changes to blueprint parameters
+This section gives guidance on how to implement changes to loss function, training schedule, learning rates, optimizer,
+some architecture parameters, data augmentation etc. All these parameters are part of the **nnU-Net trainer class**,
+which we have already seen in the sections above. The default trainer class for 2D, 3D low resolution and 3D full
+resolution U-Net is nnUNetTrainerV2, the default for the 3D full resolution U-Net from the cascade is
+nnUNetTrainerV2CascadeFullRes. Trainer classes in nnU-Net inherit form each other, nnUNetTrainerV2CascadeFullRes for
+example has nnUNetTrainerV2 as parent class and only overrides cascade-specific code.
+Due to the inheritance of trainer classes, changes can be integrated into nnU-Net quite easily and with minimal effort.
+Simply create a new trainer class (with some custom name), change the functionality you need to change and then specify
+this class (via its name) during training - done.
+This process requires the new class to be located in a subfolder of nnunet.training.network_training! Do not save it
+somewhere else or nnU-Net will not be able to find it! Also don't use the same name twice! nnU-Net always picks the
+first trainer that matches the requested name.
+Don't worry about overwriting results of another trainer class. nnU-Net always generates output folders that are named
+after the trainer class used to generate the results.
+Due to the variety of possible changes to the blueprint parameters of nnU-Net, we here only present a summary of where
+to look for what kind of modification. During method development we have already created a large number of nnU-Net
+blueprint variations which should give a good indication of where to start:
+| Type of modification    | Examples                                                                                                                                                                                                                                                                                                                                              |
+|-------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| loss function           | nnunet.training.network_training.loss_function.*                                                                                                                                                                                                                                                                                                      |
+| data augmentation       | nnunet.training.network_training.data_augmentation.*                                                                                                                                                                                                                                                                                                  |
+| Optimizer, lr, momentum | nnunet.training.network_training.optimizer_and_lr.*                                                                                                                                                                                                                                                                                                   |
+| (Batch)Normalization    | nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_BN.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_FRN.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_GN.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_NoNormalization_lr1en3.py |
+| Nonlinearity            | nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_ReLU.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_Mish.py                                                                                                                                                                                    |
+| Architecture            | nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_3ConvPerStage.py<br>nnunet.training.network_training.architectural_variants.nnUNetTrainerV2_ResencUNet                                                                                                                                                                        |
+| ...                     | (see nnunet.training.network_training and subfolders)                                                                                                                                                                                                                                                                                                 |
+## Changes to Inferred Parameters
+The inferred parameters are determined based on the dataset fingerprint, a low dimensional representation of the properties
+of the training cases. It captures, for example, the image shapes, voxel spacings and intensity information from
+the training cases. The datset fingerprint is created by the DatasetAnalyzer (which is located in nnunet.preprocessing)
+while running `nnUNet_plan_and_preprocess`.
+`nnUNet_plan_and_preprocess` uses so called ExperimentPlanners for running the adaptation process. Default ExperimentPlanner
+classes are ExperimentPlanner2D_v21 for the 2D U-Net and ExperimentPlanner3D_v21 for the 3D full resolution U-Net and the
+U-Net cascade. Just like nnUNetTrainers, the ExperimentPlanners inherit from each other, resulting in minimal programming
+effort to incorporate changes. Just like with the trainers, simply give your custom ExperimentPlanners a unique name and
+save them in some subfolder of nnunet.experiment_planning. You can then specify your class names when running
+`nnUNet_plan_and_preprocess` and nnU-Net will find them automatically. When inheriting form ExperimentPlanners, you **MUST**
+overwrite the class variables `self.data_identifier` and `self.plans_fname` (just like for example
+[here](../nnunet/experiment_planning/alternative_experiment_planning/normalization/experiment_planner_3DUNet_CT2.py)).
+If you omit this step the planner will overwrite the plans file and the preprocessed data of the planner it inherits from.
+To train with your custom configuration, simply specify the correct plans identifier with `-p` when you call the
+`nnUNet_train` command. The plans file also contains the data_identifier specified in your ExperimentPlanner, so the
+trainer class will automatically know what data should be used.
+Possible adaptations to the inferred parameters could include a different way of prioritizing batch size vs patch size
+(currently, nnU-Net prioritizies patch size), a different handling of the spacing information for architecture template
+instantiation, changing the definition of target spacing, or using different strategies for finding the 3d low
+resolution U-Net configuration.
+The folders located in nnunet.experiment_planning contain several example ExperimentPlanner that modify various aspects
+of the inferred parameters. You can use them as inspiration for your own.
+If you wish to run a different preprocessing, you most likely will have to implement your own Preprocessor class.
+The preprocessor class that is used by some ExperimentPlanner is specified in its preprocessor_name class variable. The
+default is `self.preprocessor_name = "GenericPreprocessor"` for 3D and `PreprocessorFor2D` for 2D (the 2D preprocessor
+ignores the target spacing for the first axis to ensure that images are only resampled in the axes that will make up the training samples).
+GenericPreprocessor (and all custom Preprocessors you implement) must be located in nnunet.preprocessing. The
+preprocessor_name is saved in the plans file (by ExperimentPlanner), so that the
+nnUNetTrainer knows which preprocessor must be used during inference to match the preprocessing of the training data.
+Modifications to the preprocessing pipeline could be the addition of bias field correction to MRI images, a different CT
+preprocessing scheme or a different way of resampling segmentations and image data for anisotropic cases.
+An example is provided [here](../nnunet/preprocessing/preprocessing.py).
+When implementing a custom preprocessor, you should also create a custom ExperimentPlanner that uses it (via self.preprocessor_name).
+This experiment planner must also use a matching data_identifier and plans_fname to ensure no other data is overwritten.
+## Use a different network architecture
+Changing the network architecture in nnU-Net is easy, but not self-explanatory. Any new segmentation network you implement
+needs to understand what nnU-Net requests from it (wrt how many downsampling operations are done, whether deep supervision
+is used, what the convolutional kernel sizes are supposed to be). It needs to be able to dynamiccaly change its topology,
+just like our implementation of the [Generic_UNet](../nnunet/network_architecture/generic_UNet.py). Furthermore, it must be
+able to generate a value that can be used to estimate memory consumption. What we have implemented for Generic_UNet effectively
+counts the number of voxels found in all feature maps that are present in a given configuration. Although this estimation
+disregards the number of parameters we have found it to work quite well. Unless you implement an architecture with
+unreasonably high number of parameters, the large majority of the VRAM used during training will be occupied by feature
+maps, so parameters can be (mostly) disregarded. For implementing your own network, it is key to understand that the
+number we are computing here cannot be interpreted directly as memory consumption (other factors than the feature maps
+of the convolutions also play a role, such as instance normalization. This is furthermore very hard to predict because
+there are also several different algorithms for running the convolutions, each with its own memory requirement. We train
+models with cudnn.benchmark=True, so it is impossible to predict which algorithm is used).
+So instead, to approch this problem in the most straightforward way, we manually identify the largest configuration we
+can fit in the GPU of choice (manually define the dowmsampling, patch size etc) and use this value (-10% or so to be save)
+as **reference** in the ExperimentPlanner that uses this architecture.
+To illustrate this process, we have implemented a U-Net with a residual encoder
+(see FabiansUNet in [generic_modular_residual_UNet.py](../nnunet/network_architecture/generic_modular_residual_UNet.py)).
+This UNet has a class variable called use_this_for_3D_configuration. This value was found with the code located in
+find_3d_configuration (same python file). The corresponding ExperimentPlanner
+[ExperimentPlanner3DFabiansResUNet_v21](../nnunet/experiment_planning/alternative_experiment_planning/experiment_planner_residual_3DUNet_v21.py)
+compares this value to values generated for the currently configured network topology (which are also computed by
+FabiansUNet.compute_approx_vram_consumption) to ensure that the GPU memory target is met.
+## Tutorials
+We have created tutorials on how to [manually edit plans files](tutorials/edit_plans_files.md),
+[change the target spacing](tutorials/custom_spacing.md) and
+[changing the normalization scheme for preprocessing](tutorials/custom_preprocessing.md).

documentation/inference_example_Prostate.md ADDED Viewed

	@@ -0,0 +1,78 @@

+# Example: inference with pretrained nnU-Net models
+This is a step-by-step example of how to run inference with pretrained nnU-Net models on the Prostate dataset of the
+Medical Segemtnation Decathlon.
+1) Install nnU-Net by following the instructions [here](../readme.md#installation). Make sure to set all relevant paths,
+also see [here](setting_up_paths.md). This step is necessary so that nnU-Net knows where to store trained models.
+2) Download the Prostate dataset of the Medical Segmentation Decathlon from
+[here](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2). Then extract the archive to a
+destination of your choice.
+3) We selected the Prostate dataset for this example because we have a utility script that converts the test data into
+the correct format.
+    Decathlon data come as 4D niftis. This is not compatible with nnU-Net (see dataset format specified
+    [here](dataset_conversion.md)). Convert the Prostate dataset into the correct format with
+    ```bash
+    nnUNet_convert_decathlon_task -i /xxx/Task05_Prostate
+    ```
+    Note that `Task05_Prostate` must be the folder that has the three 'imagesTr', 'labelsTr', 'imagesTs' subfolders!
+    The converted dataset can be found in `$nnUNet_raw_data_base/nnUNet_raw_data` ($nnUNet_raw_data_base is the folder for
+    raw data that you specified during installation)
+4) Download the pretrained model using this command:
+    ```bash
+    nnUNet_download_pretrained_model Task005_Prostate
+    ```
+5) The prostate dataset requires two image modalities as input. This is very much liKE RGB images have three color channels.
+nnU-Net recognizes modalities by the file ending: a single test case of the prostate dataset therefore consists of two files
+`case_0000.nii.gz` and `case_0001.nii.gz`. Each of these files is a 3D image. The file ending with 0000.nii.gz must
+always contain the T2 image and 0001.nii.gz the ADC image. Whenever you are using pretrained models, you can use
+    ```bash
+    nnUNet_print_pretrained_model_info Task005_Prostate
+    ```
+   to obtain information on which modality needs to get which number. The output for Prostate is the following:
+        Prostate Segmentation.
+        Segmentation targets are peripheral and central zone,
+        input modalities are 0: T2, 1: ADC.
+        Also see Medical Segmentation Decathlon, http://medicaldecathlon.com/
+6) The script we ran in 3) automatically converted the test data for us and stored them in
+`$nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs`. Note that you need to do this conversion youself when
+using other than Medcial Segmentation Decathlon datasets. No worries. Doing this is easy (often as simple as appending
+a _0000 to the file name if only one input modality is required). Instructions can be found here [here](data_format_inference.md).
+7) You can now predict the Prostate test cases with the pretrained model. We exemplarily use the 3D full resoltion U-Net here:
+    ```bash
+    nnUNet_predict -i $nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs/ -o OUTPUT_DIRECTORY -t 5 -m 3d_fullres
+    ```
+    Note that `-t 5` specifies the task with id 5 (which corresponds to the Prostate dataset). You can also give the full
+    task name `Task005_Prostate`. `OUTPUT_DIRECTORY` is where the resulting segmentations are saved.
+    Predictions should be quite fast and you should be done within a couple of minutes. If you would like to speed it
+    up (at the expense of a slightly lower segmentation quality) you can disable test time data augmentation by
+    setting the `--disable_tta` flag (8x speedup). If this is still too slow for you, you can consider using only a
+    single model instead of the ensemble by specifying `-f 0`. This will use only the model trained on fold 0 of the
+    cross-validation for another 5x speedup.
+8) If you want to use an ensemble of different U-Net configurations for inference, you need to run the following commands:
+    Prediction with 3d full resolution U-Net (this command is a little different than the one above).
+    ```bash
+    nnUNet_predict -i $nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs/ -o OUTPUT_DIRECTORY_3D -t 5 --save_npz -m 3d_fullres
+    ```
+    Prediction with 2D U-Net
+    ```bash
+    nnUNet_predict -i $nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs/ -o OUTPUT_DIRECTORY_2D -t 5 --save_npz -m 2d
+    ```
+    `--save_npz` will tell nnU-Net to also store the softmax probabilities for ensembling.
+    You can then merge the predictions with
+    ```bash
+    nnUNet_ensemble -f OUTPUT_DIRECTORY_3D OUTPUT_DIRECTORY_2D -o OUTPUT_FOLDER_ENSEMBLE -pp POSTPROCESSING_FILE
+    ```
+   This will merge the predictions from `OUTPUT_DIRECTORY_2D` and `OUTPUT_DIRECTORY_3D`. `-pp POSTPROCESSING_FILE`
+   (optional!) is a file that gives nnU-Net information on how to postprocess the ensemble. These files were also
+   downloaded as part of the pretrained model weights and are located at `RESULTS_FOLDER/nnUNet/ensembles/
+   Task005_Prostate/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json`.
+   We will make the postprocessing files more accessible in a future (soon!) release.

documentation/setting_up_paths.md ADDED Viewed

	@@ -0,0 +1,84 @@

+# Setting up Paths
+nnU-Net relies on environment variables to know where raw data, preprocessed data and trained model weights are stored.
+To use the full functionality of nnU-Net, the following three environment variables must be set:
+1) nnUNet_raw_data_base: This is where nnU-Net finds the raw data and stored the cropped data. The folder located at
+nnUNet_raw_data_base must have at least the subfolder nnUNet_raw_data, which in turn contains one subfolder for each Task.
+It is the responsibility of the user to bring the raw data into the appropriate format - nnU-Net will then take care of
+the rest ;-) For more information on the required raw data format, see [here](dataset_conversion.md).
+    Example tree structure:
+    ```
+    nnUNet_raw_data_base/nnUNet_raw_data/Task002_Heart
+    ├── dataset.json
+    ├── imagesTr
+    │   ├── la_003_0000.nii.gz
+    │   ├── la_004_0000.nii.gz
+    │   ├── ...
+    ├── imagesTs
+    │   ├── la_001_0000.nii.gz
+    │   ├── la_002_0000.nii.gz
+    │   ├── ...
+    └── labelsTr
+        ├── la_003.nii.gz
+        ├── la_004.nii.gz
+        ├── ...
+    nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/
+    ├── dataset.json
+    ├── imagesTr
+    │   ├── prostate_00_0000.nii.gz
+    │   ├── prostate_00_0001.nii.gz
+    │   ├── ...
+    ├── imagesTs
+    │   ├── prostate_03_0000.nii.gz
+    │   ├── prostate_03_0001.nii.gz
+    │   ├── ...
+    └── labelsTr
+        ├── prostate_00.nii.gz
+        ├── prostate_01.nii.gz
+        ├── ...
+    ```
+2) nnUNet_preprocessed: This is the folder where the preprocessed data will be saved. The data will also be read from
+this folder during training. Therefore it is important that it is located on a drive with low access latency and high
+throughput (a regular sata or nvme SSD is sufficient).
+3) RESULTS_FOLDER: This specifies where nnU-Net will save the model weights. If pretrained models are downloaded, this
+is where it will save them.
+### How to set environment variables
+(nnU-Net was developed for Ubuntu/Linux. The following guide is intended for this operating system and will not work on
+others. We do not provide support for other operating systems!)
+There are several ways you can do this. The most common one is to set the paths in your .bashrc file, which is located
+in your home directory. For me, this file is located at /home/fabian/.bashrc. You can open it with any text editor of
+choice. If you do not see the file, that may be because it is hidden by default. You can run `ls -al /home/fabian` to
+ensure that you see it. In rare cases it may not be present and you can simply create it with `touch /home/fabian/.bashrc`.
+Once the file is open in a text editor, add the following lines to the bottom:
+```
+export nnUNet_raw_data_base="/media/fabian/nnUNet_raw"
+export nnUNet_preprocessed="/media/fabian/nnUNet_preprocessed"
+export RESULTS_FOLDER="/media/fabian/nnUNet_trained_models"
+```
+(of course adapt the paths to your system and remember that nnUNet_preprocessed should be located on an SSD!)
+Then save and exit. To be save, make sure to reload the .bashrc by running `source /home/fabian/.bashrc`. Reloading
+needs only be done on terminal sessions that were already open before you saved the changes. Any new terminal you open
+after will have these paths set. You can verify that the paths are set up properly by typing `echo $RESULTS_FOLDER`
+etc and it should print out the correct folder.
+### An alternative way of setting these paths
+The method above sets the paths permanently (until you delete the lines from your .bashrc) on your system. If you wish
+to set them only temporarily, you can run the export commands in your terminal:
+```
+export nnUNet_raw_data_base="/media/fabian/nnUNet_raw"
+export nnUNet_preprocessed="/media/fabian/nnUNet_preprocessed"
+export RESULTS_FOLDER="/media/fabian/nnUNet_trained_models"
+```
+This will set the paths for the current terminal session only (the variables will be lost if you close the terminal
+and need to be reset every time).

documentation/training_example_Hippocampus.md ADDED Viewed

	@@ -0,0 +1,40 @@

+# Example: 3D U-Net training on the Hippocampus dataset
+This is a step-by-step example on how to run a 3D full resolution Training with the Hippocampus dataset from the
+Medical Segmentation Decathlon.
+1) Install nnU-Net by following the instructions [here](../readme.md#installation). Make sure to set all relevant paths,
+also see [here](setting_up_paths.md). This step is necessary so that nnU-Net knows where to store raw data,
+preprocessed data and trained models.
+2) Download the Hippocampus dataset of the Medical Segmentation Decathlon from
+[here](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2). Then extract the archive to a
+destination of your choice.
+3) Decathlon data come as 4D niftis. This is not compatible with nnU-Net (see dataset format specified
+[here](dataset_conversion.md)). Convert the Hippocampus dataset into the correct format with
+    ```bash
+    nnUNet_convert_decathlon_task -i /xxx/Task04_Hippocampus
+    ```
+    Note that `Task04_Hippocampus` must be the folder that has the three 'imagesTr', 'labelsTr', 'imagesTs' subfolders!
+    The converted dataset can be found in $nnUNet_raw_data_base/nnUNet_raw_data ($nnUNet_raw_data_base is the folder for
+    raw data that you specified during installation)
+4) You can now run nnU-Nets pipeline configuration (and the preprocessing) with the following line:
+    ```bash
+    nnUNet_plan_and_preprocess -t 4
+    ```
+   Where 4 refers to the task ID of the Hippocampus dataset.
+5) Now you can already start network training. This is how you train a 3d full resoltion U-Net on the Hippocampus dataset:
+    ```bash
+    nnUNet_train 3d_fullres nnUNetTrainerV2 4 0
+    ```
+   nnU-Net per default requires all trainings as 5-fold cross validation. The command above will run only the training for the
+   first fold (fold 0). 4 is the task identifier of the hippocampus dataset. Training one fold should take about 9
+   hours on a modern GPU.
+This tutorial is only intended to demonstrate how easy it is to get nnU-Net running. You do not need to finish the
+network training - pretrained models for the hippocampus task are available (see [here](../readme.md#run-inference)).
+The only prerequisite for running nnU-Net on your custom dataset is to bring it into a structured, nnU-Net compatible
+format. nnU-Net will take care of the rest. See [here](dataset_conversion.md) for instructions on how to convert
+datasets into nnU-Net compatible format.

documentation/tutorials/custom_preprocessing.md ADDED Viewed

	@@ -0,0 +1,60 @@

+When you would like to interfere with the way resampling during preprocessing is handled or you would like to implement
+a custom normalization scheme, you need to create a new custom preprocessor class and an ExperimentPlanner to go along
+with it. While this may appear cumbersome, the great thing about this approach is that the same code will be used for
+inference as well thus guaranteeing that images are preprocessed properly (i.e. the way the model expects).
+In this tutorial we will implement a custom normalization scheme for the Task120 Massachusetts Road Segmentation. Make
+sure to download the dataset and run the code in [Task120_Massachusetts_RoadSegm.py](../../nnunet/dataset_conversion/Task120_Massachusetts_RoadSegm.py) prior to this tutorial.
+The images in the dataset are RGB with a value range of [0, 255]. nnU-nets defaultnormalization scheme will normalize
+each color channel independently to have mean 0 and standard deviation 1. This works reasonably well, but may result
+in a shift of the color channels relative to each other and thus disturb the models performance. To address that, the new
+normalization will rescale the value range from [0, 255] to [0, 1] by simply dividing the intensities of each image by
+255. Thus, there will be no longer a shift between the color channels.
+The new preprocessor class is located in [preprocessor_scale_RGB_to_0_1.py](../../nnunet/preprocessing/custom_preprocessors/preprocessor_scale_RGB_to_0_1.py).
+To acutally use it, we need to tell the ExperimentPlanner its name. For this purpose, it is best to create a new
+ExperimentPlanner class. I created one and placed it in [experiment_planner_2DUNet_v21_RGB_scaleto_0_1.py](../../nnunet/experiment_planning/alternative_experiment_planning/normalization/experiment_planner_2DUNet_v21_RGB_scaleto_0_1.py).
+Now go have a look at these two classes. Details are in the comments there.
+To run the new preprocessor, you need to specify its accompanying ExperimentPlanner when running
+`nnUNet_plan_and_preprocess`:
+```bash
+nnUNet_plan_and_preprocess -t 120 -pl3d None -pl2d ExperimentPlanner2D_v21_RGB_scaleTo_0_1
+```
+After that you can run the training:
+```bash
+nnUNet_train 2d nnUNetTrainerV2 120 FOLD -p nnUNet_RGB_scaleTo_0_1
+```
+Note that `nnUNet_RGB_scaleTo_0_1` is the plans identifier defined in our custom ExperimentPlanner. Specify it for all
+nnUNet_* commands whenever you want to use the models resulting from this training.
+Now let all 5 folds run for the original nnU-Net as well as the one that uses the newly defined normalization scheme.
+To compare the results, you can make use of nnUNet_determine_postprocessing to get the necessary metrics, for example:
+```bash
+nnUNet_determine_postprocessing -t 120 -tr nnUNetTrainerV2 -p nnUNet_RGB_scaleTo_0_1
+```
+This will create a `cv_niftis_raw` and `cv_niftis_postprocessed` subfolder in the training output directory. In each
+ of these folders is a summary.json file that you can open with a regular text editor. In this file, there are metrics
+ for each training example in the dataset representing the outcome of the 5-fold cross-validation. At the very bottom
+ of the file, the metrics are aggregated through averaging (field "mean") and this is what you should be using to
+ compare the experiments. I recommend using the non-postprocessed summary.json (located in `cv_niftis_raw`) for this
+ because determining the postprocessing may actually overfit to the training dataset. Here are the results I obtained:
+Vanilla nnU-Net:    0.7720\
+new normalization scheme: 0.7711
+(no improvement but hey it was worth a try!)
+Remember to always place custom ExperimentPlanner in nnunet.experiment_planning (any file or submodule) and
+preprocessors in nnunet.preprocessing (any file or submodule). Make sure to use unique names!
+The example classes from this tutorial only work with 2D. You need to generate a separate set of planner and preprocessor
+for 3D data (cumbersome, I know. Needs to be improved in the future).

documentation/tutorials/custom_spacing.md ADDED Viewed

	@@ -0,0 +1,33 @@

+Sometimes you want to set custom target spacings. This is done by creating a custom ExperimentPlanner.
+Let's run this with the Task002_Heart example from the Medical Segmentation Decathlon. This dataset is not too large
+and working with it is therefore a breeze!
+This example requires you to have downloaded the dataset and converted it to nnU-Net format with
+nnUNet_convert_decathlon_task
+We need to run the nnUNet_plan_and_preprocess command with a custom 3d experiment planner to achieve this. I have
+created an appropriate trainer and placed it in [experiment_planner_baseline_3DUNet_v21_customTargetSpacing_2x2x2.py](../../nnunet/experiment_planning/alternative_experiment_planning/target_spacing/experiment_planner_baseline_3DUNet_v21_customTargetSpacing_2x2x2.py)
+This will set a hard coded target spacing of 2x2x2mm for the 3d_fullres configuration (3d_lowres is unchanged).
+Go have a look at this ExperimentPlanner now.
+To run nnUNet_plan_and_preprocess with the new ExperimentPlanner, simply specify it:
+`nnUNet_plan_and_preprocess -t 2 -pl2d None -pl3d ExperimentPlanner3D_v21_customTargetSpacing_2x2x2`
+Note how we are disabling 2D preprocessing with `-pl2d None`. The ExperimentPlanner I created is only for 3D.
+You will need to generate a separate one for 3D.
+Once this is completed your task will have been preprocessed with the desired target spacing. You can use it by
+specifying the new custom plans file that is linked to it (see
+`ExperimentPlanner3D_v21_customTargetSpacing_2x2x2` source code) when running any nnUNet_* command, for example:
+`nnUNet_train 3d_fullres nnUNetTrainerV2 2 FOLD -p nnUNetPlansv2.1_trgSp_2x2x2`
+(make sure to omit the `_plans_3D.pkl` suffix!)
+**TODO**: how to compare with the default run?
+IMPORTANT: When creating custom ExperimentPlanner, make sure to always place them under a unique class name somewhere
+in the nnunet.experiment_planning module. If you create subfolders, make sure they contain an __init__py file
+(can be empty). If you fail to do so nnU-Net will not be able to locate your ExperimentPlanner and crash!

documentation/tutorials/edit_plans_files.md ADDED Viewed

	@@ -0,0 +1,141 @@

+Changing the plans files grants you a lot of flexibility: You can depart from nnU-Net's default configuration and play
+with different U-Net topologies, batch sizes and patch sizes. It is a powerful tool!
+To better understand the components describing the network topology in our plans files, please read section 6.2
+in the [supplementary information](https://static-content.springer.com/esm/art%3A10.1038%2Fs41592-020-01008-z/MediaObjects/41592_2020_1008_MOESM1_ESM.pdf)
+(page 13) of our paper!
+The goal of this tutorial is to demonstrate how to read and modify plans files and how to use them in your
+experiments. The file used here works with Task120 and requires you to have downloaded the dataset, run
+nnunet.dataset_conversion.Task120_Massachusetts_RoadSegm.py and then run nnUNet_plan_and_preprocess for it.
+Note that this task is 2D only, but the same principles we use here can be easily extended to 3D and other tasks as well.
+The output of `nnUNet_plan_and_preprocess` for this task looks like this:
+    [{'batch_size': 2,
+    'num_pool_per_axis': [8, 8],
+    'patch_size': array([1280, 1024]),
+    'median_patient_size_in_voxels': array([   1, 1500, 1500]),
+    'current_spacing': array([999.,   1.,   1.]),
+    'original_spacing': array([999.,   1.,   1.]),
+    'pool_op_kernel_sizes': [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]],
+    'conv_kernel_sizes': [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]],
+    'do_dummy_2D_data_aug': False}]
+This is also essentially what is saved in the plans file under the key 'plans_per_stage'
+For this task, nnU-Net intends to use a patch size of 1280x1024 and a U-Net architecture with 8 pooling
+operations per axis. Due to GPU memory constraints, the batch size is just 2.
+Knowing the dataset we could hypothesize that a different approach might produce better results: The decision
+of whether a pixel belongs to 'road' or not does not depend on the large contextual information that the large
+patch size (and U-Net architecture) offer and could instead be made with more local information. Training with
+a batch size of just 2 in a dataset with 800 training cases means that each batch contains only limited variability.
+So one possible conclusion could be that smaller patches but larger batch sizes might result in a better
+segmentation outcome. Let's investigate (using the same GPU memory constraint, determined manually with trial
+and error!):
+Variant 1: patch size 512x512, batch size 12
+The following snippet makes the necessary adaptations to the plans file
+```python
+from batchgenerators.utilities.file_and_folder_operations import *
+import numpy as np
+from nnunet.paths import preprocessing_output_dir
+task_name = 'Task120_MassRoadsSeg'
+# if it breaks upon loading the plans file, make sure to run the Task120 dataset conversion and
+# nnUNet_plan_and_preprocess first!
+plans_fname = join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_plans_2D.pkl')
+plans = load_pickle(plans_fname)
+plans['plans_per_stage'][0]['batch_size'] = 12
+plans['plans_per_stage'][0]['patch_size'] = np.array((512, 512))
+plans['plans_per_stage'][0]['num_pool_per_axis'] = [7, 7]
+# because we changed the num_pool_per_axis, we need to change conv_kernel_sizes and pool_op_kernel_sizes as well!
+plans['plans_per_stage'][0]['pool_op_kernel_sizes'] = [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]]
+plans['plans_per_stage'][0]['conv_kernel_sizes'] = [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]]
+# for a network with num_pool_per_axis [7,7] the correct length of pool kernel sizes is 7 and the length of conv
+# kernel sizes is 8! Note that you can also change these numbers if you believe it makes sense. A pool kernel size
+# of 1 will result in no pooling along that axis, a kernel size of 3 will reduce the size of the feature map
+# representations by factor 3 instead of 2.
+# save the plans under a new plans name. Note that the new plans file must end with _plans_2D.pkl!
+save_pickle(plans, join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_ps512_bs12_plans_2D.pkl'))
+```
+Variant 2: patch size 256x256, batch size 60
+```python
+from batchgenerators.utilities.file_and_folder_operations import *
+import numpy as np
+from nnunet.paths import preprocessing_output_dir
+task_name = 'Task120_MassRoadsSeg'
+plans_fname = join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_plans_2D.pkl')
+plans = load_pickle(plans_fname)
+plans['plans_per_stage'][0]['batch_size'] = 60
+plans['plans_per_stage'][0]['patch_size'] = np.array((256, 256))
+plans['plans_per_stage'][0]['num_pool_per_axis'] = [6, 6]
+plans['plans_per_stage'][0]['pool_op_kernel_sizes'] = [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]]
+plans['plans_per_stage'][0]['conv_kernel_sizes'] = [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]]
+save_pickle(plans, join(preprocessing_output_dir, task_name, 'nnUNetPlansv2.1_ps256_bs60_plans_2D.pkl'))
+```
+You can now use these custom plans files to train the networks and compare the results! Remeber that all nnUNet_*
+commands have the -h argument to display their arguments. nnUNet_train supports custom plans via the -p argument.
+Custom plans must be the prefix, so here this is everything except '_plans_2D.pkl':
+Variant 1:
+```bash
+nnUNet_train 2d nnUNetTrainerV2 120 FOLD -p nnUNetPlansv2.1_ps512_bs12
+```
+Variant 2:
+```bash
+nnUNet_train 2d nnUNetTrainerV2 120 FOLD -p nnUNetPlansv2.1_ps256_bs60
+```
+Let all 5 folds run for each plans file (original and the two variants). To compare the results, you can make use of
+nnUNet_determine_postprocessing to get the necessary metrics, for example:
+```bash
+nnUNet_determine_postprocessing -t 120 -tr nnUNetTrainerV2 -p nnUNetPlansv2.1_ps512_bs12 -m 2d
+```
+This will create a `cv_niftis_raw` and `cv_niftis_postprocessed` subfolder in the training output directory. In each
+ of these folders is a summary.json file that you can open with a regular text editor. In this file, there are metrics
+ for each training example in the dataset representing the outcome of the 5-fold cross-validation. At the very bottom
+ of the file, the metrics are aggregated through averaging (field "mean") and this is what you should be using to
+ compare the experiments. I recommend using the non-postprocessed summary.json (located in `cv_niftis_raw`) for this
+ because determining the postprocessing may actually overfit to the training dataset. Here are the results I obtained:
+Vanilla nnU-Net:    0.7720\
+Variant 1: 0.7724\
+Variant 2: 0.7734
+The results are remarkable similar and I would not necessarily conclude that such a small improvement in Dice is a
+significant outcome. Nonetheless it was worth a try :-)
+Despite the results shown here I would like to emphasize that modifying the plans file can be an extremely powerful
+tool to improve the performance of nnU-Net on some datasets. You never know until you try it.
+**ADDITIONAL INFORMATION (READ THIS!)**
+  - when working with 3d plans ('nnUNetPlansv2.1_plans_3D.pkl') the 3d_lowres and 3d_fullres stage will be encoded
+    in the same plans file. If len(plans['plans_per_stage']) == 2, then [0] is the 3d_lowres and [1] is the
+    3d_fullres variant. If len(plans['plans_per_stage']) == 1 then [0] will be 3d_fullres and 3d_cascade_fullres
+    (they use the same plans).
+  - 'pool_op_kernel_sizes' together with determines 'patch_size' determines the size of the feature map
+    representations at the bottleneck. For Variant 1 & 2 presented here, the size of the feature map representation is
+    `print(plans['plans_per_stage'][0]['patch_size'] / np.prod(plans['plans_per_stage'][0]['pool_op_kernel_sizes'], 0))`
+    > [4., 4.]
+    If you see a non-integer number here, your model will crash! Make sure these are always integers!
+    nnU-Net will never create smaller bottlenecks than 4!
+  - do not change the 'current_spacing' in the plans file! This will not work properly. To change the target spacing,
+  have a look at the [custom spacing](custom_spacing.md) tutorial.

documentation/using_nnUNet_as_baseline.md ADDED Viewed

	@@ -0,0 +1,4 @@

+(The U-Net is the current punching bag of methods development. nnU-Net is going to be that looking forward. That is
+cool (great, in fact!), but it should be done correctly. Here are tips on how to benchmark against nnU-Net)
+This is work in progress

evaluate_nnUNet.py ADDED Viewed

	@@ -0,0 +1,656 @@

+from argparse import ArgumentParser
+import os
+import numpy as np
+from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score
+from data_processing.data_postprocessing import postprocess_zone_segmenation, postprocess_front_segmenation, extract_front_from_zones
+import torch.nn as nn
+#from segmentation_models_pytorch.losses.dice import DiceLoss
+#from PIL import Image
+#from models.front_segmentation_model import DistanceMapBCE
+import re
+from pathlib import Path
+import cv2
+import scipy.stats as st
+from scipy.spatial import distance
+import skimage
+import matplotlib.pyplot as plt
+from skimage.morphology import skeletonize
+import json
+import plotly.graph_objects as go
+import plotly.express as px
+import plotly.io as pio
+import os
+pio.kaleido.scope.mathjax = None
+def front_error(prediction, label):
+    """
+    prediction: mask of the front prediction (WxH)
+    label: mask of the front label (WxH)
+    returns the mean distance of the two fronts
+    """
+    front_is_present_flag = True
+    polyline_pred = np.nonzero(prediction)
+    polyline_label = np.nonzero(label)
+    # Generate Nx2 matrix of pixels that represent the front
+    pred_coords = np.array(list(zip(polyline_pred[0], polyline_pred[1])))
+    mask_coords = np.array(list(zip(polyline_label[0], polyline_label[1])))
+    # Return NaN if front is not detected in either pred or mask
+    if pred_coords.shape[0] == 0 or mask_coords.shape[0] == 0:
+        front_is_present_flag = False
+        return front_is_present_flag, np.nan, np.nan, np.nan
+    # Generate the pairwise distances between each point and the closest point in the other array
+    distances1 = distance.cdist(pred_coords, mask_coords).min(axis=1)
+    distances2 = distance.cdist(mask_coords, pred_coords).min(axis=1)
+    distances = np.concatenate((distances1, distances2))
+    # Calculate the average distance between each point and the closest point in the other array
+    mean_distance = np.mean(distances)
+    median_distance = np.median(distances)
+    return front_is_present_flag, mean_distance, median_distance, distances
+def multi_class_metric(metric_function, complete_predicted_mask, complete_target):
+    metrics = []
+    metric_na, metric_stone, metric_glacier, metric_ocean = metric_function(np.ndarray.flatten(complete_target),
+                                                                            np.ndarray.flatten(complete_predicted_mask),
+                                                                            average=None)
+    metric_macro_average = (metric_na + metric_stone + metric_glacier + metric_ocean) / 4
+    metrics.append(metric_macro_average)
+    metrics.append(metric_na)
+    metrics.append(metric_stone)
+    metrics.append(metric_glacier)
+    metrics.append(metric_ocean)
+    return metrics
+def get_matching_out_of_folder(file_name, folder):
+    files = os.listdir(folder)
+    matching_files = [a for a in files if
+                      re.match(pattern=os.path.split(file_name)[1][:-4], string=os.path.split(a)[1])]
+    if len(matching_files) > 1:
+        print("Something went wrong!")
+        print(f"targets_matching: {matching_files}")
+    if len(matching_files) < 1:
+        print("Something went wrong! No matches found")
+    return matching_files[0]
+def turn_colors_to_class_labels_zones(mask):
+    mask_class_labels = np.copy(mask)
+    mask_class_labels[mask == 0] = 0
+    mask_class_labels[mask == 64] = 1
+    mask_class_labels[mask == 127] = 2
+    mask_class_labels[mask == 254] = 3
+    return mask_class_labels
+def turn_colors_to_class_labels_front(mask):
+    mask_class_labels = np.copy(mask)
+    mask_class_labels[mask == 0] = 0
+    mask_class_labels[mask == 255] = 1
+    return mask_class_labels
+def print_zone_metrics(metric_name, list_of_metrics):
+    metrics = [metric for [metric, _, _, _, _] in list_of_metrics if not np.isnan(metric)]
+    metrics_na = [metric_na for [_, metric_na, _, _, _] in list_of_metrics if not np.isnan(metric_na)]
+    metrics_stone = [metric_stone for [_, _, metric_stone, _, _] in list_of_metrics if not np.isnan(metric_stone)]
+    metrics_glacier = [metric_glacier for [_, _, _, metric_glacier, _] in list_of_metrics if not np.isnan(metric_glacier)]
+    metrics_ocean = [metric_ocean for [_, _, _, _, metric_ocean] in list_of_metrics if not np.isnan(metric_ocean)]
+    result = {}
+    print(f"Average {metric_name}: {sum(metrics) / len(metrics)}")
+    result[f'Average_{metric_name}'] = sum(metrics) / len(metrics)
+    print(f"Average {metric_name} NA Area: {sum(metrics_na) / len(metrics_na)}")
+    result[f'Average_{metric_name}_NA_Area'] = sum(metrics_na) / len(metrics_na)
+    print(f"Average {metric_name} Stone: {sum(metrics_stone) / len(metrics_stone)}")
+    result[f"Average_{metric_name}_Stone"] =sum(metrics_stone) / len(metrics_stone)
+    print(f"Average {metric_name} Glacier: {sum(metrics_glacier) / len(metrics_glacier)}")
+    result[f"Average_{metric_name}_Glacier"] = sum(metrics_glacier) / len(metrics_glacier)
+    print(f"Average {metric_name} Ocean and Ice Melange: {sum(metrics_ocean) / len(metrics_ocean)}")
+    result[f"Average_{metric_name}_Ocean_and_Ice_Melange"] = sum(metrics_ocean) / len(metrics_ocean)
+    return result
+def print_front_metric(name, metric):
+    result = {}
+    print(f"Average {name}: {sum(metric) / len(metric)}")
+    result[f"Average {name}"] = sum(metric) / len(metric)
+    return result
+def mask_prediction_with_bounding_box(post_complete_predicted_mask, file_name, bounding_boxes_directory):
+    matching_bounding_box_file = get_matching_out_of_folder(file_name, bounding_boxes_directory)
+    with open(os.path.join(bounding_boxes_directory, matching_bounding_box_file)) as f:
+        coord_file_lines = f.readlines()
+    left_upper_corner_x, left_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[1].split(",")]
+    left_lower_corner_x, left_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[2].split(",")]
+    right_lower_corner_x, right_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[3].split(",")]
+    right_upper_corner_x, right_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[4].split(",")]
+    # Make sure the Bounding Box coordinates are within the image
+    if left_upper_corner_x < 0: left_upper_corner_x = 0
+    if left_lower_corner_x < 0: left_lower_corner_x = 0
+    if right_upper_corner_x > len(post_complete_predicted_mask[0]): right_upper_corner_x = len(post_complete_predicted_mask[0]) - 1
+    if right_lower_corner_x > len(post_complete_predicted_mask[0]): right_lower_corner_x = len(post_complete_predicted_mask[0]) - 1
+    if left_upper_corner_y > len(post_complete_predicted_mask): left_upper_corner_y = len(post_complete_predicted_mask) - 1
+    if left_lower_corner_y < 0: left_lower_corner_y = 0
+    if right_upper_corner_y > len(post_complete_predicted_mask): right_upper_corner_y = len(post_complete_predicted_mask) - 1
+    if right_lower_corner_y < 0: right_lower_corner_y = 0
+    # remember cv2 images have the shape (height, width)
+    post_complete_predicted_mask[:right_lower_corner_y, :] = 0.0
+    post_complete_predicted_mask[left_upper_corner_y:, :] = 0.0
+    post_complete_predicted_mask[:, :left_upper_corner_x] = 0.0
+    post_complete_predicted_mask[:, right_lower_corner_x:] = 0.0
+    return post_complete_predicted_mask
+def post_processing(target_masks, complete_predicted_masks, bounding_boxes_directory, complete_test_directory):
+    meter_threshold = 750     # in meter
+    print("Post-processing ...\n\n")
+    for file_name in complete_predicted_masks:
+        prediction_name = file_name
+        if file_name.endswith('_zone.png'):
+            file_name = file_name[:-len("_zone.png")] + ".png"
+        if file_name.endswith('_front.png'):
+            file_name = file_name[:-len("front.png")] +".png"
+        print(f"File: {file_name}")
+        resolution = int(os.path.split(file_name)[1][:-4].split('_')[-3])
+        # pixel_threshold (pixel) * resolution (m/pixel) = meter_threshold (m)
+        pixel_threshold = meter_threshold / resolution
+        complete_predicted_mask = cv2.imread(os.path.join(complete_test_directory, prediction_name).__str__(), cv2.IMREAD_GRAYSCALE)
+        if target_masks == "zones":
+            post_complete_predicted_mask = postprocess_zone_segmenation(complete_predicted_mask)
+            post_complete_predicted_mask = extract_front_from_zones(post_complete_predicted_mask, pixel_threshold)
+        else:
+            complete_predicted_mask_class_labels = turn_colors_to_class_labels_front(complete_predicted_mask)
+            post_complete_predicted_mask = postprocess_front_segmenation(complete_predicted_mask_class_labels, pixel_threshold)
+            post_complete_predicted_mask = post_complete_predicted_mask * 255
+        post_complete_predicted_mask = mask_prediction_with_bounding_box(post_complete_predicted_mask, file_name,
+                                                                         bounding_boxes_directory)
+        cv2.imwrite(os.path.join(complete_postprocessed_test_directory, file_name), post_complete_predicted_mask)
+def calculate_front_delineation_metric(complete_postprocessed_test_directory, post_processed_predicted_masks, directory_of_target_fronts, bounding_boxes_directory):
+    list_of_mean_front_errors = []
+    list_of_median_front_errors = []
+    list_of_all_front_errors = []
+    number_of_images_with_no_predicted_front = 0
+    results = {}
+    for file_name in post_processed_predicted_masks[:]:
+        post_processed_predicted_mask = cv2.imread(
+            os.path.join(complete_postprocessed_test_directory, file_name).__str__(), cv2.IMREAD_GRAYSCALE)
+        matching_target_file = get_matching_out_of_folder(file_name, directory_of_target_fronts)
+        target_front = cv2.imread(os.path.join(directory_of_target_fronts, matching_target_file).__str__(),
+                                  cv2.IMREAD_GRAYSCALE)
+        if file_name.endswith("_front.png"):
+            resolution = int(os.path.split(file_name)[1][:-4].split('_')[-4])
+        else:
+            resolution = int(os.path.split(file_name)[1][:-4].split('_')[-3])
+        # images need to be turned into a Tensor [0, ..., n_classes-1]
+        post_processed_predicted_mask_class_labels = turn_colors_to_class_labels_front(post_processed_predicted_mask)
+        target_front_class_labels = turn_colors_to_class_labels_front(target_front)
+        if file_name.endswith('_front.png'):
+            post_processed_predicted_mask_class_labels = mask_prediction_with_bounding_box(post_processed_predicted_mask_class_labels, file_name[:-len('_front.png')]+'.png', bounding_boxes_directory)
+            post_processed_predicted_mask_class_labels = skeletonize(post_processed_predicted_mask_class_labels)
+        front_is_present_flag, mean_error, median_error, errors = front_error(
+            post_processed_predicted_mask_class_labels, target_front_class_labels)
+        if not front_is_present_flag:
+            number_of_images_with_no_predicted_front += 1
+        else:
+            list_of_mean_front_errors.append(resolution * mean_error)
+            list_of_median_front_errors.append(resolution * median_error)
+            list_of_all_front_errors = np.concatenate((list_of_all_front_errors, resolution * errors))
+    print(f"Number of images with no predicted front: {number_of_images_with_no_predicted_front}")
+    results["Number_no_front"] = number_of_images_with_no_predicted_front
+    if number_of_images_with_no_predicted_front >= len(post_processed_predicted_masks):
+        print(f"Number of images with no predicted front is equal to complete set of images. No metrics can be calculated.")
+        return [], {}
+    list_of_mean_front_errors_without_nan = [front_error for front_error in list_of_mean_front_errors if
+                                             not np.isnan(front_error)]
+    list_of_median_front_errors_without_nan = [front_error for front_error in list_of_median_front_errors if
+                                               not np.isnan(front_error)]
+    print(f"Mean-mean distance error (in meters): {sum(list_of_mean_front_errors_without_nan) / len(list_of_mean_front_errors_without_nan)}")
+    results["Mean_mean_distance"] = sum(list_of_mean_front_errors_without_nan) / len(list_of_mean_front_errors_without_nan)
+    print(f"Mean-median distance error (in meters): {sum(list_of_median_front_errors_without_nan) / len(list_of_median_front_errors_without_nan)}")
+    results["Mean_median_distance"] = sum(list_of_median_front_errors_without_nan) / len(list_of_median_front_errors_without_nan)
+    list_of_mean_front_errors_without_nan = np.array(list_of_mean_front_errors_without_nan)
+    list_of_median_front_errors_without_nan = np.array(list_of_median_front_errors_without_nan)
+    print(f"Median-mean distance error (in meters): {np.median(list_of_mean_front_errors_without_nan)}")
+    results["Median_mean_distance"] = np.median(list_of_mean_front_errors_without_nan)
+    print(f"Median-median distance error (in meters): {np.median(list_of_median_front_errors_without_nan)}")
+    results["Median_median_distance"] = np.median(list_of_median_front_errors_without_nan)
+    list_of_all_front_errors_without_nan = [front_error for front_error in list_of_all_front_errors if
+                                            not np.isnan(front_error)]
+    list_of_all_front_errors_without_nan = np.array(list_of_all_front_errors_without_nan)
+    confidence_interval = st.norm.interval(alpha=0.95,
+                                           loc=np.mean(list_of_all_front_errors_without_nan),
+                                           scale=st.sem(list_of_all_front_errors_without_nan))
+    mean = np.mean(list_of_all_front_errors_without_nan)
+    std = np.std(list_of_all_front_errors_without_nan)
+    print(f"Confidence interval: {confidence_interval}, mean: {mean}, standard deviation: {std}")
+    results["Confidence_interval"] = confidence_interval
+    results['mean'] = mean
+    results['standard_deviation'] = std
+    return list_of_mean_front_errors_without_nan, results
+def calculate_segmentation_metrics(target_mask_modality, complete_predicted_masks, complete_test_directory,
+                                   directory_of_complete_targets):
+    print("Calculate segmentation metrics ...\n\n")
+    list_of_ious = []
+    list_of_precisions = []
+    list_of_recalls = []
+    list_of_f1_scores = []
+    result = {}
+    for file_name in complete_predicted_masks:
+        print(f"File: {file_name}")
+        complete_predicted_mask = cv2.imread(os.path.join(complete_test_directory, file_name).__str__(),
+                                             cv2.IMREAD_GRAYSCALE)
+        matching_target_file = get_matching_out_of_folder(file_name, directory_of_complete_targets)
+        complete_target = cv2.imread(os.path.join(directory_of_complete_targets, matching_target_file).__str__(),
+                                     cv2.IMREAD_GRAYSCALE)
+        if target_mask_modality == "zones":
+            # images need to be turned into a Tensor [0, ..., n_classes-1]
+            complete_predicted_mask_class_labels = turn_colors_to_class_labels_zones(complete_predicted_mask)
+            complete_target_class_labels = turn_colors_to_class_labels_zones(complete_target)
+            # Segmentation evaluation metrics
+            list_of_ious.append(
+                multi_class_metric(jaccard_score, complete_predicted_mask_class_labels, complete_target_class_labels))
+            list_of_precisions.append(
+                multi_class_metric(precision_score, complete_predicted_mask_class_labels, complete_target_class_labels))
+            list_of_recalls.append(
+                multi_class_metric(recall_score, complete_predicted_mask_class_labels, complete_target_class_labels))
+            list_of_f1_scores.append(
+                multi_class_metric(f1_score, complete_predicted_mask_class_labels, complete_target_class_labels))
+        else:
+            # images need to be turned into a Tensor [0, ..., n_classes-1]
+            complete_predicted_mask_class_labels = turn_colors_to_class_labels_front(complete_predicted_mask)
+            complete_target_class_labels = turn_colors_to_class_labels_front(complete_target)
+            # Segmentation evaluation metrics
+            flattened_complete_target_class_labels = np.ndarray.flatten(complete_target_class_labels)
+            flattened_complete_predicted_mask_class_labels = np.ndarray.flatten(complete_predicted_mask_class_labels)
+            list_of_ious.append(
+                jaccard_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
+            list_of_precisions.append(
+                precision_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
+            list_of_recalls.append(
+                recall_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
+            list_of_f1_scores.append(
+                f1_score(flattened_complete_target_class_labels, flattened_complete_predicted_mask_class_labels))
+    if target_mask_modality == "zones":
+        result_precision = print_zone_metrics("Precision", list_of_precisions)
+        result["Zone_Precision"] = result_precision
+        result_recal = print_zone_metrics("Recall", list_of_recalls)
+        result["Zone_Recall"] = result_recal
+        result_f1 = print_zone_metrics("F1 Score", list_of_f1_scores)
+        result["Zone_F1"] = result_f1
+        result_iou = print_zone_metrics("IoU", list_of_ious)
+        result["Zone_IoU"] = result_iou
+    else:
+        if len(list_of_precisions) > 0:
+            result_precsions = print_front_metric("Precision", list_of_precisions)
+            result["Front_Precsion"] = result_precsions
+        if len(list_of_recalls) > 0:
+            result_recall = print_front_metric("Recall", list_of_recalls)
+            result["Front_Recall"] = result_recall
+        if len(list_of_f1_scores):
+            result_f1 = print_front_metric("F1 Score", list_of_f1_scores)
+            result["Front_F1"] = result_f1
+        if len(list_of_ious)>0:
+            result_iou = print_front_metric("IoU", list_of_ious)
+            result["Front_IoU"] = result_iou
+    return result
+def check_whether_winter_half_year(name):
+    split_name = name[:-4].split('_')
+    if split_name[0] == "COL" or split_name[0] == "JAC":
+        nord_halbkugel = True
+    else:                                                   # Jorum, Maple, Crane, SI, DBE
+        nord_halbkugel = False
+    month = int(split_name[1].split('-')[1])
+    if nord_halbkugel:
+        if month < 4 or month > 8:
+            winter = True
+        else:
+            winter = False
+    else:
+        if month < 4 or month > 8:
+            winter = False
+        else:
+            winter = True
+    return winter
+def front_delineation_metric(modality, complete_postprocessed_test_directory, directory_of_target_fronts, bounding_boxes_directory):
+    print("Calculating distance errors ...\n\n")
+    if modality == 'front':
+        post_processed_predicted_masks = list(file for file in os.listdir(complete_postprocessed_test_directory) if file.endswith('_front.png'))
+    elif modality == 'zone':
+        post_processed_predicted_masks = list(file for file in os.listdir(complete_postprocessed_test_directory))
+    print("")
+    print("####################################################################")
+    print(f"# Results for all images")
+    print("####################################################################")
+    fig = px.box(None, points="all", template="plotly_white", log_x=True, height=300)
+    G10 = px.colors.qualitative.G10
+    width = 0.5
+    list_of_mean_front_errors_without_nan, result_all = calculate_front_delineation_metric(complete_postprocessed_test_directory, post_processed_predicted_masks, directory_of_target_fronts, bounding_boxes_directory)
+    np.savetxt(os.path.join(complete_postprocessed_test_directory, os.pardir, "distance_errors.txt"), list_of_mean_front_errors_without_nan)
+    fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width))
+    results = {}
+    results['Result_all'] = result_all
+    # Season subsetting
+    for season in ["winter", "summer"]:
+        print("")
+        print("####################################################################")
+        print(f"# Results for only images in {season}")
+        print("####################################################################")
+        subset_of_predictions = []
+        for file_name in post_processed_predicted_masks:
+            winter = check_whether_winter_half_year(file_name)
+            if (winter and season == "summer") or (not winter and season == "winter"):
+                continue
+            subset_of_predictions.append(file_name)
+        if len(subset_of_predictions) == 0: continue
+        all_errors, result_season = calculate_front_delineation_metric(complete_postprocessed_test_directory, subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
+        if season == 'winter':
+            color = G10[9]
+        else:
+            color = G10[8]
+        print(season, np.mean(all_errors), np.std(all_errors))
+        fig.add_trace(go.Box(x=all_errors, marker_color=color, boxmean=True, boxpoints='all', name=season, width=width, legendrank=0))
+        results[season] = result_season
+    fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
+    fig.update_xaxes(title='front delineation error (m)')
+    fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['summer','winter','all']})
+    fig.update_traces(orientation='h')  # horizontal box plots
+    fig.write_image("create_plots_new/output/error_season.pdf", format='pdf')
+    # Glacier subsetting
+    fig = px.box(None, points="all", template="plotly_white", log_x=True, height=300)
+    fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width,legendrank=7))
+    color = {'COL': G10[3], 'Mapple': G10[4]}
+    for glacier in ["Mapple", "COL", "Crane", "DBE", "JAC", "Jorum", "SI"]:
+        print("")
+        print("####################################################################")
+        print(f"# Results for only images from {glacier}")
+        print("####################################################################")
+        subset_of_predictions = []
+        for file_name in post_processed_predicted_masks:
+            if not file_name[:-4].split('_')[0] == glacier:
+                continue
+            subset_of_predictions.append(file_name)
+        if len(subset_of_predictions) == 0: continue
+        all_errors, result_glacier = calculate_front_delineation_metric(complete_postprocessed_test_directory,subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
+        print(glacier, np.mean(all_errors), np.std(all_errors))
+        fig.add_trace(
+            go.Box(x=all_errors, marker_color=color[glacier], boxmean=True, boxpoints='all', name=glacier, width=width, ))
+        results[glacier] = {}
+        results[glacier]['all'] = result_glacier
+    fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
+    fig.update_xaxes(title='front delineation error (m)')
+    fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['Mapple', 'COL', 'all']})
+    fig.update_traces(orientation='h')  # horizontal box plots
+    fig.write_image("create_plots_new/output/error_glacier.pdf", format='pdf')
+    color = {'ERS': G10[9], 'RSAT': G10[1], 'ENVISAT': G10[8], 'PALSAR':G10[3], 'TSX':G10[4], 'TDX':G10[5], 'S1':G10[6]}
+    # Sensor subsetting
+    fig = px.box(None, points="all", template="plotly_white", log_x=True, height=500)
+    fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width))
+    for sensor in ["RSAT", "S1", "ENVISAT", "ERS", "PALSAR", "TSX", "TDX"]:
+        print("")
+        print("####################################################################")
+        print(f"# Results for only images from {sensor}")
+        print("####################################################################")
+        subset_of_predictions = []
+        for file_name in post_processed_predicted_masks:
+            if not file_name[:-4].split('_')[2] == sensor:
+                continue
+            subset_of_predictions.append(file_name)
+        if len(subset_of_predictions) == 0: continue
+        all_errors, result_sensor = calculate_front_delineation_metric(complete_postprocessed_test_directory,subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
+        print(sensor, np.mean(all_errors), np.std(all_errors))
+        fig.add_trace(
+            go.Box(x=all_errors,  marker_color=color[sensor], boxmean=True, boxpoints='all', name=sensor, width=width))
+        results[sensor] = result_sensor
+    fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
+    fig.update_xaxes(title='front delineation error (m)')
+    fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':[ 'S1','TDX','TSX','PALSAR', 'ENVISAT', 'ERS','all']})
+    fig.update_traces(orientation='h')  # horizontal box plots
+    fig.write_image("create_plots_new/output/error_satellite.pdf", format='pdf')
+    exit()
+    # Resolution subsetting
+    fig = px.box(None, points="all", template="plotly_white", log_x=True)
+    fig.add_trace(go.Box(x=list_of_mean_front_errors_without_nan, marker_color='orange', boxmean=True, boxpoints='all', name='all', width=width))
+    color ={20: G10[9], 17:G10[8], 7:G10[3]}
+    for res in [20, 17, 7]:
+        print("")
+        print("####################################################################")
+        print(f"# Results for only images with a resolution of {res}")
+        print("####################################################################")
+        subset_of_predictions = []
+        for file_name in post_processed_predicted_masks:
+            if not int(file_name[:-4].split('_')[3]) == res:
+                continue
+            subset_of_predictions.append(file_name)
+        if len(subset_of_predictions) == 0: continue
+        all_errors, result_res = calculate_front_delineation_metric(complete_postprocessed_test_directory,subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
+        fig.add_trace(
+            go.Box(x=all_errors,  marker_color=color[res], boxmean=True, boxpoints='all', name=res, width=width))
+        results[res] = result_res
+    fig.update_layout(showlegend=False, font=dict(family="Times New Roma", size=12))
+    fig.update_xaxes(title='front delineation error (m)')
+    fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':['7', '17', '20','all']})
+    fig.update_traces(orientation='h')  # horizontal box plots
+    fig.write_image("create_plots_new/output/error_resolution.pdf", format='pdf')
+    # Season and glacier subsetting
+    for glacier in ["Mapple", "COL", "Crane", "DBE", "JAC", "Jorum", "SI"]:
+        for season in ["winter", "summer"]:
+            print("")
+            print("####################################################################")
+            print(f"# Results for only images in {season} and from {glacier}")
+            print("####################################################################")
+            subset_of_predictions = []
+            for file_name in post_processed_predicted_masks:
+                winter = check_whether_winter_half_year(file_name)
+                if not file_name[:-4].split('_')[0] == glacier:
+                    continue
+                if (winter and season == "summer") or (not winter and season == "winter"):
+                    continue
+                subset_of_predictions.append(file_name)
+            if len(subset_of_predictions) == 0: continue
+            _, results_gla_season = calculate_front_delineation_metric(complete_postprocessed_test_directory, subset_of_predictions, directory_of_target_fronts, bounding_boxes_directory)
+            results[glacier][season] = results_gla_season
+    return results
+def visualizations(complete_postprocessed_test_directory, directory_of_target_fronts, directory_of_sar_images,
+                   bounding_boxes_directory, visualizations_dir):
+    print("Creating visualizations ...\n\n")
+    post_processed_predicted_masks = os.listdir(os.path.join(complete_postprocessed_test_directory))
+    for file_name in post_processed_predicted_masks:
+        if not file_name.endswith('.png'):
+            continue
+        resolution = int(os.path.split(file_name)[1][:-4].split('_')[-3])
+        if resolution < 10:
+            dilation = 5
+        else:
+            dilation = 3
+        if file_name.endswith('_front.png'):
+            post_processed_predicted_mask = cv2.imread(os.path.join(complete_postprocessed_test_directory, file_name).__str__(), cv2.IMREAD_GRAYSCALE)
+            post_processed_predicted_mask = mask_prediction_with_bounding_box(post_processed_predicted_mask, file_name[:-len('_front.png')]+'.png', bounding_boxes_directory)
+            post_processed_predicted_mask[post_processed_predicted_mask > 1] =1
+            post_processed_predicted_mask_skeletonized = skeletonize(post_processed_predicted_mask)
+            post_processed_predicted_mask = np.zeros(post_processed_predicted_mask_skeletonized.shape)
+            post_processed_predicted_mask[post_processed_predicted_mask_skeletonized] = 255
+            matching_target_file = get_matching_out_of_folder(file_name[:-len('_front.png')]+'.png', directory_of_target_fronts)
+            target_front = cv2.imread(os.path.join(directory_of_target_fronts, matching_target_file).__str__(), cv2.IMREAD_GRAYSCALE)
+            matching_sar_file = get_matching_out_of_folder(file_name[:-len('_front.png')]+'.png', directory_of_sar_images)
+            sar_image = cv2.imread(os.path.join(directory_of_sar_images, matching_sar_file).__str__(), cv2.IMREAD_GRAYSCALE)
+        elif file_name.endswith('_zone.png'):
+            continue
+        elif file_name.endswith('_recon.png'):
+            continue
+        else:
+            post_processed_predicted_mask = cv2.imread(
+                os.path.join(complete_postprocessed_test_directory, file_name).__str__(), cv2.IMREAD_GRAYSCALE)
+            matching_target_file = get_matching_out_of_folder(file_name, directory_of_target_fronts)
+            target_front = cv2.imread(os.path.join(directory_of_target_fronts, matching_target_file).__str__(),cv2.IMREAD_GRAYSCALE)
+            matching_sar_file = get_matching_out_of_folder(file_name, directory_of_sar_images)
+            sar_image = cv2.imread(os.path.join(directory_of_sar_images, matching_sar_file).__str__(),cv2.IMREAD_GRAYSCALE)
+        predicted_front = np.array(post_processed_predicted_mask)
+        ground_truth_front = np.array(target_front)
+        kernel = np.ones((dilation, dilation), np.uint8)
+        predicted_front = cv2.dilate(predicted_front, kernel, iterations=1)
+        ground_truth_front = cv2.dilate(ground_truth_front, kernel, iterations=1)
+        sar_image = np.array(sar_image)
+        sar_image_rgb = skimage.color.gray2rgb(sar_image)
+        sar_image_rgb = np.uint8(sar_image_rgb)
+        sar_image_rgb[predicted_front > 0] = [0, 255, 255]                # b, g, r
+        sar_image_rgb[ground_truth_front > 0] = [255, 51, 51]
+        correct_prediction = np.logical_and(predicted_front, ground_truth_front)
+        sar_image_rgb[correct_prediction > 0] = [255, 0, 255]        # [51, 255, 51]   # [0, 153, 0]
+        # Insert Bounding Box
+        matching_bounding_box_file = get_matching_out_of_folder(file_name, bounding_boxes_directory)
+        with open(os.path.join(bounding_boxes_directory, matching_bounding_box_file)) as f:
+            coord_file_lines = f.readlines()
+        left_upper_corner_x, left_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[1].split(",")]
+        left_lower_corner_x, left_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[2].split(",")]
+        right_lower_corner_x, right_lower_corner_y = [round(float(coord)) for coord in coord_file_lines[3].split(",")]
+        right_upper_corner_x, right_upper_corner_y = [round(float(coord)) for coord in coord_file_lines[4].split(",")]
+        bounding_box = np.zeros((len(sar_image_rgb), len(sar_image_rgb[0])))
+        if left_upper_corner_x < 0: left_upper_corner_x = 0
+        if left_lower_corner_x < 0: left_lower_corner_x = 0
+        if right_upper_corner_x > len(sar_image_rgb[0]): right_upper_corner_x = len(sar_image_rgb[0]) - 1
+        if right_lower_corner_x > len(sar_image_rgb[0]): right_lower_corner_x = len(sar_image_rgb[0]) - 1
+        if left_upper_corner_y > len(sar_image_rgb): left_upper_corner_y = len(sar_image_rgb) - 1
+        if left_lower_corner_y < 0: left_lower_corner_y = 0
+        if right_upper_corner_y > len(sar_image_rgb): right_upper_corner_y = len(sar_image_rgb) - 1
+        if right_lower_corner_y < 0: right_lower_corner_y = 0
+        bounding_box[left_upper_corner_y, left_upper_corner_x:right_upper_corner_x] = 1
+        bounding_box[left_lower_corner_y, left_lower_corner_x:right_lower_corner_x] = 1
+        bounding_box[left_lower_corner_y:left_upper_corner_y, left_upper_corner_x] = 1
+        bounding_box[right_lower_corner_y:right_upper_corner_y, right_lower_corner_x] = 1
+        bounding_box = cv2.dilate(bounding_box, kernel, iterations=1)
+        sar_image_rgb[bounding_box > 0] = [255, 255, 0]
+        cv2.imwrite(os.path.join(visualizations_dir, file_name), sar_image_rgb)
+def main(complete_test_directory, directory_of_complete_targets_zones, directory_of_complete_targets_fronts, directory_of_sar_images):
+    # ###############################################################################################
+    # CALCULATE SEGMENTATION METRICS (IoU & Hausdorff Distance)
+    # ###############################################################################################
+    complete_predicted_masks_zones = list(file for file in os.listdir(complete_test_directory) if file.endswith('_zone.png'))
+    complete_predicted_masks_fronts = list(file for file in os.listdir(complete_test_directory) if file.endswith('_front.png'))
+    src = Path(directory_of_sar_images).parent.parent.parent
+    bounding_boxes_directory = os.path.join(src, "data_raw", "bounding_boxes")
+    results = {}
+    # only on zone
+    if len(complete_predicted_masks_zones) > 0:
+        results_seg = calculate_segmentation_metrics('zones', complete_predicted_masks_zones, complete_test_directory,
+                                      directory_of_complete_targets_zones,)
+        results['Zone_Segmentation'] = results_seg
+    if len(complete_predicted_masks_fronts) >0:
+        results_seg = calculate_segmentation_metrics('fronts', complete_predicted_masks_fronts,
+                                                     complete_test_directory,
+                                                     directory_of_complete_targets_fronts, )
+        results['Front_Segmentation'] = results_seg
+    # ###############################################################################################
+    # POST-PROCESSING
+    # ###############################################################################################
+    src = Path(directory_of_sar_images).parent.parent.parent
+    print(src)
+    if len(complete_predicted_masks_zones) > 0:
+        post_processing('zones', complete_predicted_masks_zones, bounding_boxes_directory, complete_test_directory)
+    # ###############################################################################################
+    # CALCULATE FRONT DELINEATION METRIC (Mean distance error)
+    # ###############################################################################################
+    if len(complete_predicted_masks_zones) > 0:
+        print("Front delineation from ZONE post processed")
+        results_zone = front_delineation_metric('zone', complete_postprocessed_test_directory, directory_of_complete_targets_fronts, bounding_boxes_directory)
+        results['Zone_Delineation'] = results_zone
+    if len(complete_predicted_masks_fronts) > 0:
+        print("Front delineation from FRONT directly")
+        results_front = front_delineation_metric('front', complete_test_directory, directory_of_complete_targets_fronts, bounding_boxes_directory)
+        results['Front_Delineation'] = results_front
+    results_file = open(complete_test_directory+'/eval_results.json', "w")
+    json.dump(results, results_file)
+    # ###############################################################################################
+    # MAKE VISUALIZATIONS
+    # ###############################################################################################
+    if len(complete_predicted_masks_zones) > 0:
+        visualizations(complete_postprocessed_test_directory, directory_of_complete_targets_fronts, directory_of_sar_images,
+                       bounding_boxes_directory, visualizations_dir)
+    if len(complete_predicted_masks_fronts) > 0:
+        front_prediction_dir = complete_test_directory
+        visualizations(front_prediction_dir, directory_of_complete_targets_fronts, directory_of_sar_images,
+                       bounding_boxes_directory, visualizations_dir)
+if __name__ == "__main__":
+    print("Start Evaluation")
+    parser = ArgumentParser(add_help=False)
+    parser.add_argument('--predictions', help="Directory with predictions as png")
+    parser.add_argument('--labels_fronts', help="Directory with labels as png")
+    parser.add_argument('--labels_zones', help="Directory with labels as png")
+    parser.add_argument('--sar_images', help="Directory with sar images")
+    hparams = parser.parse_args()
+    complete_test_directory = hparams.predictions
+    complete_postprocessed_test_directory = os.path.join(complete_test_directory, "postprocessed")
+    os.makedirs(complete_postprocessed_test_directory, exist_ok=True)
+    visualizations_dir = os.path.join(complete_test_directory, "visualization")
+    os.makedirs(visualizations_dir, exist_ok=True)
+    main(hparams.predictions, hparams.labels_zones, hparams.labels_fronts, hparams.sar_images)

nnunet/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from __future__ import absolute_import
+print("\n\nPlease cite the following paper when using nnUNet:\n\nIsensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "
+      "\"nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation.\" "
+      "Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z\n\n")
+print("If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet\n")
+from . import *

nnunet/configuration.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+default_num_threads = 8 if 'nnUNet_def_n_proc' not in os.environ else int(os.environ['nnUNet_def_n_proc'])
+RESAMPLING_SEPARATE_Z_ANISO_THRESHOLD = 3  # determines what threshold to use for resampling the low resolution axis
+# separately (with NN)

nnunet/dataset_conversion/Task017_BeyondCranialVaultAbdominalOrganSegmentation.py ADDED Viewed

	@@ -0,0 +1,94 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+if __name__ == "__main__":
+    base = "/media/yunlu/10TB/research/other_data/Multi-Atlas Labeling Beyond the Cranial Vault/RawData/"
+    task_id = 17
+    task_name = "AbdominalOrganSegmentation"
+    prefix = 'ABD'
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_folder = join(base, "Training/img")
+    label_folder = join(base, "Training/label")
+    test_folder = join(base, "Test/img")
+    train_patient_names = []
+    test_patient_names = []
+    train_patients = subfiles(train_folder, join=False, suffix = 'nii.gz')
+    for p in train_patients:
+        serial_number = int(p[3:7])
+        train_patient_name = f'{prefix}_{serial_number:03d}.nii.gz'
+        label_file = join(label_folder, f'label{p[3:]}')
+        image_file = join(train_folder, p)
+        shutil.copy(image_file, join(imagestr, f'{train_patient_name[:7]}_0000.nii.gz'))
+        shutil.copy(label_file, join(labelstr, train_patient_name))
+        train_patient_names.append(train_patient_name)
+    test_patients = subfiles(test_folder, join=False, suffix=".nii.gz")
+    for p in test_patients:
+        p = p[:-7]
+        image_file = join(test_folder, p + ".nii.gz")
+        serial_number = int(p[3:7])
+        test_patient_name = f'{prefix}_{serial_number:03d}.nii.gz'
+        shutil.copy(image_file, join(imagests, f'{test_patient_name[:7]}_0000.nii.gz'))
+        test_patient_names.append(test_patient_name)
+    json_dict = OrderedDict()
+    json_dict['name'] = "AbdominalOrganSegmentation"
+    json_dict['description'] = "Multi-Atlas Labeling Beyond the Cranial Vault Abdominal Organ Segmentation"
+    json_dict['tensorImageSize'] = "3D"
+    json_dict['reference'] = "https://www.synapse.org/#!Synapse:syn3193805/wiki/217789"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = OrderedDict({
+        "00": "background",
+        "01": "spleen",
+        "02": "right kidney",
+        "03": "left kidney",
+        "04": "gallbladder",
+        "05": "esophagus",
+        "06": "liver",
+        "07": "stomach",
+        "08": "aorta",
+        "09": "inferior vena cava",
+        "10": "portal vein and splenic vein",
+        "11": "pancreas",
+        "12": "right adrenal gland",
+        "13": "left adrenal gland"}
+    )
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s" % train_patient_name, "label": "./labelsTr/%s" % train_patient_name} for i, train_patient_name in enumerate(train_patient_names)]
+    json_dict['test'] = ["./imagesTs/%s" % test_patient_name for test_patient_name in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task024_Promise2012.py ADDED Viewed

	@@ -0,0 +1,81 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+from batchgenerators.utilities.file_and_folder_operations import *
+def export_for_submission(source_dir, target_dir):
+    """
+    promise wants mhd :-/
+    :param source_dir:
+    :param target_dir:
+    :return:
+    """
+    files = subfiles(source_dir, suffix=".nii.gz", join=False)
+    target_files = [join(target_dir, i[:-7] + ".mhd") for i in files]
+    maybe_mkdir_p(target_dir)
+    for f, t in zip(files, target_files):
+        img = sitk.ReadImage(join(source_dir, f))
+        sitk.WriteImage(img, t)
+if __name__ == "__main__":
+    folder = "/media/fabian/My Book/datasets/promise2012"
+    out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task024_Promise"
+    maybe_mkdir_p(join(out_folder, "imagesTr"))
+    maybe_mkdir_p(join(out_folder, "imagesTs"))
+    maybe_mkdir_p(join(out_folder, "labelsTr"))
+    # train
+    current_dir = join(folder, "train")
+    segmentations = subfiles(current_dir, suffix="segmentation.mhd")
+    raw_data = [i for i in subfiles(current_dir, suffix="mhd") if not i.endswith("segmentation.mhd")]
+    for i in raw_data:
+        out_fname = join(out_folder, "imagesTr", i.split("/")[-1][:-4] + "_0000.nii.gz")
+        sitk.WriteImage(sitk.ReadImage(i), out_fname)
+    for i in segmentations:
+        out_fname = join(out_folder, "labelsTr", i.split("/")[-1][:-17] + ".nii.gz")
+        sitk.WriteImage(sitk.ReadImage(i), out_fname)
+    # test
+    current_dir = join(folder, "test")
+    test_data = subfiles(current_dir, suffix="mhd")
+    for i in test_data:
+        out_fname = join(out_folder, "imagesTs", i.split("/")[-1][:-4] + "_0000.nii.gz")
+        sitk.WriteImage(sitk.ReadImage(i), out_fname)
+    json_dict = OrderedDict()
+    json_dict['name'] = "PROMISE12"
+    json_dict['description'] = "prostate"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "prostate"
+    }
+    json_dict['numTraining'] = len(raw_data)
+    json_dict['numTest'] = len(test_data)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-4], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-4]} for i in
+                             raw_data]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-4] for i in test_data]
+    save_json(json_dict, os.path.join(out_folder, "dataset.json"))

nnunet/dataset_conversion/Task027_AutomaticCardiacDetectionChallenge.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import numpy as np
+from sklearn.model_selection import KFold
+def convert_to_submission(source_dir, target_dir):
+    niftis = subfiles(source_dir, join=False, suffix=".nii.gz")
+    patientids = np.unique([i[:10] for i in niftis])
+    maybe_mkdir_p(target_dir)
+    for p in patientids:
+        files_of_that_patient = subfiles(source_dir, prefix=p, suffix=".nii.gz", join=False)
+        assert len(files_of_that_patient)
+        files_of_that_patient.sort()
+        # first is ED, second is ES
+        shutil.copy(join(source_dir, files_of_that_patient[0]), join(target_dir, p + "_ED.nii.gz"))
+        shutil.copy(join(source_dir, files_of_that_patient[1]), join(target_dir, p + "_ES.nii.gz"))
+if __name__ == "__main__":
+    folder = "/media/fabian/My Book/datasets/ACDC/training"
+    folder_test = "/media/fabian/My Book/datasets/ACDC/testing/testing"
+    out_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task027_ACDC"
+    maybe_mkdir_p(join(out_folder, "imagesTr"))
+    maybe_mkdir_p(join(out_folder, "imagesTs"))
+    maybe_mkdir_p(join(out_folder, "labelsTr"))
+    # train
+    all_train_files = []
+    patient_dirs_train = subfolders(folder, prefix="patient")
+    for p in patient_dirs_train:
+        current_dir = p
+        data_files_train = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
+        corresponding_seg_files = [i[:-7] + "_gt.nii.gz" for i in data_files_train]
+        for d, s in zip(data_files_train, corresponding_seg_files):
+            patient_identifier = d.split("/")[-1][:-7]
+            all_train_files.append(patient_identifier + "_0000.nii.gz")
+            shutil.copy(d, join(out_folder, "imagesTr", patient_identifier + "_0000.nii.gz"))
+            shutil.copy(s, join(out_folder, "labelsTr", patient_identifier + ".nii.gz"))
+    # test
+    all_test_files = []
+    patient_dirs_test = subfolders(folder_test, prefix="patient")
+    for p in patient_dirs_test:
+        current_dir = p
+        data_files_test = [i for i in subfiles(current_dir, suffix=".nii.gz") if i.find("_gt") == -1 and i.find("_4d") == -1]
+        for d in data_files_test:
+            patient_identifier = d.split("/")[-1][:-7]
+            all_test_files.append(patient_identifier + "_0000.nii.gz")
+            shutil.copy(d, join(out_folder, "imagesTs", patient_identifier + "_0000.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "ACDC"
+    json_dict['description'] = "cardias cine MRI segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see ACDC challenge"
+    json_dict['licence'] = "see ACDC challenge"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "RV",
+        "2": "MLV",
+        "3": "LVC"
+    }
+    json_dict['numTraining'] = len(all_train_files)
+    json_dict['numTest'] = len(all_test_files)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1][:-12], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1][:-12]} for i in
+                             all_train_files]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1][:-12] for i in all_test_files]
+    save_json(json_dict, os.path.join(out_folder, "dataset.json"))
+    # create a dummy split (patients need to be separated)
+    splits = []
+    patients = np.unique([i[:10] for i in all_train_files])
+    patientids = [i[:-12] for i in all_train_files]
+    kf = KFold(5, True, 12345)
+    for tr, val in kf.split(patients):
+        splits.append(OrderedDict())
+        tr_patients = patients[tr]
+        splits[-1]['train'] = [i[:-12] for i in all_train_files if i[:10] in tr_patients]
+        val_patients = patients[val]
+        splits[-1]['val'] = [i[:-12] for i in all_train_files if i[:10] in val_patients]
+    save_pickle(splits, "/media/fabian/nnunet/Task027_ACDC/splits_final.pkl")

nnunet/dataset_conversion/Task029_LiverTumorSegmentationChallenge.py ADDED Viewed

	@@ -0,0 +1,123 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+from batchgenerators.utilities.file_and_folder_operations import *
+from multiprocessing import Pool
+import numpy as np
+from nnunet.configuration import default_num_threads
+from scipy.ndimage import label
+def export_segmentations(indir, outdir):
+    niftis = subfiles(indir, suffix='nii.gz', join=False)
+    for n in niftis:
+        identifier = str(n.split("_")[-1][:-7])
+        outfname = join(outdir, "test-segmentation-%s.nii" % identifier)
+        img = sitk.ReadImage(join(indir, n))
+        sitk.WriteImage(img, outfname)
+def export_segmentations_postprocess(indir, outdir):
+    maybe_mkdir_p(outdir)
+    niftis = subfiles(indir, suffix='nii.gz', join=False)
+    for n in niftis:
+        print("\n", n)
+        identifier = str(n.split("_")[-1][:-7])
+        outfname = join(outdir, "test-segmentation-%s.nii" % identifier)
+        img = sitk.ReadImage(join(indir, n))
+        img_npy = sitk.GetArrayFromImage(img)
+        lmap, num_objects = label((img_npy > 0).astype(int))
+        sizes = []
+        for o in range(1, num_objects + 1):
+            sizes.append((lmap == o).sum())
+        mx = np.argmax(sizes) + 1
+        print(sizes)
+        img_npy[lmap != mx] = 0
+        img_new = sitk.GetImageFromArray(img_npy)
+        img_new.CopyInformation(img)
+        sitk.WriteImage(img_new, outfname)
+if __name__ == "__main__":
+    train_dir = "/media/fabian/DeepLearningData/tmp/LITS-Challenge-Train-Data"
+    test_dir = "/media/fabian/My Book/datasets/LiTS/test_data"
+    output_folder = "/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task029_LITS"
+    img_dir = join(output_folder, "imagesTr")
+    lab_dir = join(output_folder, "labelsTr")
+    img_dir_te = join(output_folder, "imagesTs")
+    maybe_mkdir_p(img_dir)
+    maybe_mkdir_p(lab_dir)
+    maybe_mkdir_p(img_dir_te)
+    def load_save_train(args):
+        data_file, seg_file = args
+        pat_id = data_file.split("/")[-1]
+        pat_id = "train_" + pat_id.split("-")[-1][:-4]
+        img_itk = sitk.ReadImage(data_file)
+        sitk.WriteImage(img_itk, join(img_dir, pat_id + "_0000.nii.gz"))
+        img_itk = sitk.ReadImage(seg_file)
+        sitk.WriteImage(img_itk, join(lab_dir, pat_id + ".nii.gz"))
+        return pat_id
+    def load_save_test(args):
+        data_file = args
+        pat_id = data_file.split("/")[-1]
+        pat_id = "test_" + pat_id.split("-")[-1][:-4]
+        img_itk = sitk.ReadImage(data_file)
+        sitk.WriteImage(img_itk, join(img_dir_te, pat_id + "_0000.nii.gz"))
+        return pat_id
+    nii_files_tr_data = subfiles(train_dir, True, "volume", "nii", True)
+    nii_files_tr_seg = subfiles(train_dir, True, "segmen", "nii", True)
+    nii_files_ts = subfiles(test_dir, True, "test-volume", "nii", True)
+    p = Pool(default_num_threads)
+    train_ids = p.map(load_save_train, zip(nii_files_tr_data, nii_files_tr_seg))
+    test_ids = p.map(load_save_test, nii_files_ts)
+    p.close()
+    p.join()
+    json_dict = OrderedDict()
+    json_dict['name'] = "LITS"
+    json_dict['description'] = "LITS"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "liver",
+        "2": "tumor"
+    }
+    json_dict['numTraining'] = len(train_ids)
+    json_dict['numTest'] = len(test_ids)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in train_ids]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_ids]
+    with open(os.path.join(output_folder, "dataset.json"), 'w') as f:
+        json.dump(json_dict, f, indent=4, sort_keys=True)

nnunet/dataset_conversion/Task032_BraTS_2018.py ADDED Viewed

	@@ -0,0 +1,176 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from multiprocessing.pool import Pool
+import numpy as np
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.dataset_conversion.Task043_BraTS_2019 import copy_BraTS_segmentation_and_convert_labels
+from nnunet.paths import nnUNet_raw_data
+import SimpleITK as sitk
+import shutil
+def convert_labels_back_to_BraTS(seg: np.ndarray):
+    new_seg = np.zeros_like(seg)
+    new_seg[seg == 1] = 2
+    new_seg[seg == 3] = 4
+    new_seg[seg == 2] = 1
+    return new_seg
+def load_convert_save(filename, input_folder, output_folder):
+    a = sitk.ReadImage(join(input_folder, filename))
+    b = sitk.GetArrayFromImage(a)
+    c = convert_labels_back_to_BraTS(b)
+    d = sitk.GetImageFromArray(c)
+    d.CopyInformation(a)
+    sitk.WriteImage(d, join(output_folder, filename))
+def convert_labels_back_to_BraTS_2018_2019_convention(input_folder: str, output_folder: str, num_processes: int = 12):
+    """
+    reads all prediction files (nifti) in the input folder, converts the labels back to BraTS convention and saves the
+    result in output_folder
+    :param input_folder:
+    :param output_folder:
+    :return:
+    """
+    maybe_mkdir_p(output_folder)
+    nii = subfiles(input_folder, suffix='.nii.gz', join=False)
+    p = Pool(num_processes)
+    p.starmap(load_convert_save, zip(nii, [input_folder] * len(nii), [output_folder] * len(nii)))
+    p.close()
+    p.join()
+if __name__ == "__main__":
+    """
+    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+    """
+    task_name = "Task032_BraTS2018"
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Training"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    patient_names = []
+    for tpe in ["HGG", "LGG"]:
+        cur = join(downloaded_data_dir, tpe)
+        for p in subdirs(cur, join=False):
+            patdir = join(cur, p)
+            patient_name = tpe + "__" + p
+            patient_names.append(patient_name)
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            seg = join(patdir, p + "_seg.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+                isfile(seg)
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+            copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "BraTS2018"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see BraTS2018"
+    json_dict['licence'] = "see BraTS2019 license"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "T1",
+        "1": "T1ce",
+        "2": "T2",
+        "3": "FLAIR"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "edema",
+        "2": "non-enhancing",
+        "3": "enhancing",
+    }
+    json_dict['numTraining'] = len(patient_names)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_names]
+    json_dict['test'] = []
+    save_json(json_dict, join(target_base, "dataset.json"))
+    del tpe, cur
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Validation"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Testing_FIsensee"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))

nnunet/dataset_conversion/Task035_ISBI_MSLesionSegmentationChallenge.py ADDED Viewed

	@@ -0,0 +1,162 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from collections import OrderedDict
+import numpy as np
+import SimpleITK as sitk
+import multiprocessing
+from batchgenerators.utilities.file_and_folder_operations import *
+def convert_to_nii_gz(filename):
+    f = sitk.ReadImage(filename)
+    sitk.WriteImage(f, os.path.splitext(filename)[0] + ".nii.gz")
+    os.remove(filename)
+def convert_for_submission(source_dir, target_dir):
+    files = subfiles(source_dir, suffix=".nii.gz", join=False)
+    maybe_mkdir_p(target_dir)
+    for f in files:
+        splitted = f.split("__")
+        case_id = int(splitted[1])
+        timestep = int(splitted[2][:-7])
+        t = join(target_dir, "test%02d_%02d_nnUNet.nii" % (case_id, timestep))
+        img = sitk.ReadImage(join(source_dir, f))
+        sitk.WriteImage(img, t)
+if __name__ == "__main__":
+    # convert to nifti.gz
+    dirs = ['/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr',
+            '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs',
+            '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr']
+    p = multiprocessing.Pool(3)
+    for d in dirs:
+        nii_files = subfiles(d, suffix='.nii')
+        p.map(convert_to_nii_gz, nii_files)
+    p.close()
+    p.join()
+    def rename_files(folder):
+        all_files = subfiles(folder, join=False)
+        # there are max 14 patients per folder, starting with 1
+        for patientid in range(1, 15):
+            # there are certainly no more than 10 time steps per patient, starting with 1
+            for t in range(1, 10):
+                patient_files = [i for i in all_files if i.find("%02.0d_%02.0d_" % (patientid, t)) != -1]
+                if not len(patient_files) == 4:
+                    continue
+                flair_file = [i for i in patient_files if i.endswith("_flair_pp.nii.gz")][0]
+                mprage_file = [i for i in patient_files if i.endswith("_mprage_pp.nii.gz")][0]
+                pd_file = [i for i in patient_files if i.endswith("_pd_pp.nii.gz")][0]
+                t2_file = [i for i in patient_files if i.endswith("_t2_pp.nii.gz")][0]
+                os.rename(join(folder, flair_file), join(folder, "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t)))
+                os.rename(join(folder, mprage_file), join(folder, "case__%02.0d__%02.0d_0001.nii.gz" % (patientid, t)))
+                os.rename(join(folder, pd_file), join(folder, "case__%02.0d__%02.0d_0002.nii.gz" % (patientid, t)))
+                os.rename(join(folder, t2_file), join(folder, "case__%02.0d__%02.0d_0003.nii.gz" % (patientid, t)))
+    for d in dirs[:-1]:
+        rename_files(d)
+    # now we have to deal with the training masks, we do it the quick and dirty way here by just creating copies of the
+    # training data
+    train_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTr'
+    for patientid in range(1, 6):
+        for t in range(1, 6):
+            fnames_original = subfiles(train_folder, prefix="case__%02.0d__%02.0d" % (patientid, t), suffix=".nii.gz", sort=True)
+            for f in fnames_original:
+                for mask in [1, 2]:
+                    fname_target = f[:-12] + "__mask%d" % mask + f[-12:]
+                    shutil.copy(f, fname_target)
+                os.remove(f)
+    labels_folder = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/labelsTr'
+    for patientid in range(1, 6):
+        for t in range(1, 6):
+            for mask in [1, 2]:
+                f = join(labels_folder, "training%02d_%02d_mask%d.nii.gz" % (patientid, t, mask))
+                if isfile(f):
+                    os.rename(f, join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask)))
+    tr_files = []
+    for patientid in range(1, 6):
+        for t in range(1, 6):
+            for mask in [1, 2]:
+                if isfile(join(labels_folder, "case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))):
+                    tr_files.append("case__%02.0d__%02.0d__mask%d.nii.gz" % (patientid, t, mask))
+    ts_files = []
+    for patientid in range(1, 20):
+        for t in range(1, 20):
+            if isfile(join("/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs",
+                           "case__%02.0d__%02.0d_0000.nii.gz" % (patientid, t))):
+                ts_files.append("case__%02.0d__%02.0d.nii.gz" % (patientid, t))
+    out_base = '/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/'
+    json_dict = OrderedDict()
+    json_dict['name'] = "ISBI_Lesion_Segmentation_Challenge_2015"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "flair",
+        "1": "mprage",
+        "2": "pd",
+        "3": "t2"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "lesion"
+    }
+    json_dict['numTraining'] = len(subfiles(labels_folder))
+    json_dict['numTest'] = len(subfiles('/media/fabian/My Book/MedicalDecathlon/Task035_ISBILesionSegmentation/imagesTs')) // 4
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i[:-7], "label": "./labelsTr/%s.nii.gz" % i[:-7]} for i in
+                             tr_files]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i[:-7] for i in ts_files]
+    save_json(json_dict, join(out_base, "dataset.json"))
+    case_identifiers = np.unique([i[:-12] for i in subfiles("/media/fabian/My Book/MedicalDecathlon/MedicalDecathlon_raw_splitted/Task035_ISBILesionSegmentation/imagesTr", suffix='.nii.gz', join=False)])
+    splits = []
+    for f in range(5):
+        cases = [i for i in range(1, 6) if i != f+1]
+        splits.append(OrderedDict())
+        splits[-1]['val'] = np.array([i for i in case_identifiers if i.startswith("case__%02d__" % (f + 1))])
+        remaining = [i for i in case_identifiers if i not in splits[-1]['val']]
+        splits[-1]['train'] = np.array(remaining)
+    maybe_mkdir_p("/media/fabian/nnunet/Task035_ISBILesionSegmentation")
+    save_pickle(splits, join("/media/fabian/nnunet/Task035_ISBILesionSegmentation", "splits_final.pkl"))

nnunet/dataset_conversion/Task037_038_Chaos_Challenge.py ADDED Viewed

	@@ -0,0 +1,460 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from PIL import Image
+import shutil
+from collections import OrderedDict
+import dicom2nifti
+import numpy as np
+from batchgenerators.utilities.data_splitting import get_split_deterministic
+from batchgenerators.utilities.file_and_folder_operations import *
+from PIL import Image
+import SimpleITK as sitk
+from nnunet.paths import preprocessing_output_dir, nnUNet_raw_data
+from nnunet.utilities.sitk_stuff import copy_geometry
+from nnunet.inference.ensemble_predictions import merge
+def load_png_stack(folder):
+    pngs = subfiles(folder, suffix="png")
+    pngs.sort()
+    loaded = []
+    for p in pngs:
+        loaded.append(np.array(Image.open(p)))
+    loaded = np.stack(loaded, 0)[::-1]
+    return loaded
+def convert_CT_seg(loaded_png):
+    return loaded_png.astype(np.uint16)
+def convert_MR_seg(loaded_png):
+    result = np.zeros(loaded_png.shape)
+    result[(loaded_png > 55) & (loaded_png <= 70)] = 1 # liver
+    result[(loaded_png > 110) & (loaded_png <= 135)] = 2 # right kidney
+    result[(loaded_png > 175) & (loaded_png <= 200)] = 3 # left kidney
+    result[(loaded_png > 240) & (loaded_png <= 255)] = 4 # spleen
+    return result
+def convert_seg_to_intensity_task5(seg):
+    seg_new = np.zeros(seg.shape, dtype=np.uint8)
+    seg_new[seg == 1] = 63
+    seg_new[seg == 2] = 126
+    seg_new[seg == 3] = 189
+    seg_new[seg == 4] = 252
+    return seg_new
+def convert_seg_to_intensity_task3(seg):
+    seg_new = np.zeros(seg.shape, dtype=np.uint8)
+    seg_new[seg == 1] = 63
+    return seg_new
+def write_pngs_from_nifti(nifti, output_folder, converter=convert_seg_to_intensity_task3):
+    npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti))
+    seg_new = converter(npy)
+    for z in range(len(npy)):
+        Image.fromarray(seg_new[z]).save(join(output_folder, "img%03.0d.png" % z))
+def convert_variant2_predicted_test_to_submission_format(folder_with_predictions,
+                                                         output_folder="/home/fabian/drives/datasets/results/nnUNet/test_sets/Task038_CHAOS_Task_3_5_Variant2/ready_to_submit",
+                                                         postprocessing_file="/home/fabian/drives/datasets/results/nnUNet/ensembles/Task038_CHAOS_Task_3_5_Variant2/ensemble_2d__nnUNetTrainerV2__nnUNetPlansv2.1--3d_fullres__nnUNetTrainerV2__nnUNetPlansv2.1/postprocessing.json"):
+    """
+    output_folder is where the extracted template is
+    :param folder_with_predictions:
+    :param output_folder:
+    :return:
+    """
+    postprocessing_file = "/media/fabian/Results/nnUNet/3d_fullres/Task039_CHAOS_Task_3_5_Variant2_highres/" \
+                          "nnUNetTrainerV2__nnUNetPlansfixed/postprocessing.json"
+    # variant 2 treats in and out phase as two training examples, so we need to ensemble these two again
+    final_predictions_folder = join(output_folder, "final")
+    maybe_mkdir_p(final_predictions_folder)
+    t1_patient_names = [i.split("_")[-1][:-7] for i in subfiles(folder_with_predictions, prefix="T1", suffix=".nii.gz", join=False)]
+    folder_for_ensembing0 = join(output_folder, "ens0")
+    folder_for_ensembing1 = join(output_folder, "ens1")
+    maybe_mkdir_p(folder_for_ensembing0)
+    maybe_mkdir_p(folder_for_ensembing1)
+    # now copy all t1 out phases in ens0 and all in phases in ens1. Name them the same.
+    for t1 in t1_patient_names:
+        shutil.copy(join(folder_with_predictions, "T1_in_%s.npz" % t1), join(folder_for_ensembing1, "T1_%s.npz" % t1))
+        shutil.copy(join(folder_with_predictions, "T1_in_%s.pkl" % t1), join(folder_for_ensembing1, "T1_%s.pkl" % t1))
+        shutil.copy(join(folder_with_predictions, "T1_out_%s.npz" % t1), join(folder_for_ensembing0, "T1_%s.npz" % t1))
+        shutil.copy(join(folder_with_predictions, "T1_out_%s.pkl" % t1), join(folder_for_ensembing0, "T1_%s.pkl" % t1))
+    shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing0, "plans.pkl"))
+    shutil.copy(join(folder_with_predictions, "plans.pkl"), join(folder_for_ensembing1, "plans.pkl"))
+    # there is a problem with T1_35 that I need to correct manually (different crop size, will not negatively impact results)
+    #ens0_softmax = np.load(join(folder_for_ensembing0, "T1_35.npz"))['softmax']
+    ens1_softmax = np.load(join(folder_for_ensembing1, "T1_35.npz"))['softmax']
+    #ens0_props = load_pickle(join(folder_for_ensembing0, "T1_35.pkl"))
+    #ens1_props = load_pickle(join(folder_for_ensembing1, "T1_35.pkl"))
+    ens1_softmax = ens1_softmax[:, :, :-1, :]
+    np.savez_compressed(join(folder_for_ensembing1, "T1_35.npz"), softmax=ens1_softmax)
+    shutil.copy(join(folder_for_ensembing0, "T1_35.pkl"), join(folder_for_ensembing1, "T1_35.pkl"))
+    # now call my ensemble function
+    merge((folder_for_ensembing0, folder_for_ensembing1), final_predictions_folder, 8, True,
+          postprocessing_file=postprocessing_file)
+    # copy t2 files to final_predictions_folder as well
+    t2_files = subfiles(folder_with_predictions, prefix="T2", suffix=".nii.gz", join=False)
+    for t2 in t2_files:
+        shutil.copy(join(folder_with_predictions, t2), join(final_predictions_folder, t2))
+    # apply postprocessing
+    from nnunet.postprocessing.connected_components import apply_postprocessing_to_folder, load_postprocessing
+    postprocessed_folder = join(output_folder, "final_postprocessed")
+    for_which_classes, min_valid_obj_size = load_postprocessing(postprocessing_file)
+    apply_postprocessing_to_folder(final_predictions_folder, postprocessed_folder,
+                                   for_which_classes, min_valid_obj_size, 8)
+    # now export the niftis in the weird png format
+    # task 3
+    output_dir = join(output_folder, "CHAOS_submission_template_new", "Task3", "MR")
+    for t1 in t1_patient_names:
+        output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
+        nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3)
+    for t2 in t2_files:
+        patname = t2.split("_")[-1][:-7]
+        output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
+        nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task3)
+    # task 5
+    output_dir = join(output_folder, "CHAOS_submission_template_new", "Task5", "MR")
+    for t1 in t1_patient_names:
+        output_folder_here = join(output_dir, t1, "T1DUAL", "Results")
+        nifti_file = join(postprocessed_folder, "T1_%s.nii.gz" % t1)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)
+    for t2 in t2_files:
+        patname = t2.split("_")[-1][:-7]
+        output_folder_here = join(output_dir, patname, "T2SPIR", "Results")
+        nifti_file = join(postprocessed_folder, "T2_%s.nii.gz" % patname)
+        write_pngs_from_nifti(nifti_file, output_folder_here, converter=convert_seg_to_intensity_task5)
+if __name__ == "__main__":
+    """
+    This script only prepares data to participate in Task 5 and Task 5. I don't like the CT task because
+    1) there are
+    no abdominal organs in the ground truth. In the case of CT we are supposed to train only liver while on MRI we are
+    supposed to train all organs. This would require manual modification of nnU-net to deal with this dataset. This is
+    not what nnU-net is about.
+    2) CT Liver or multiorgan segmentation is too easy to get external data for. Therefore the challenges comes down
+    to who gets the b est external data, not who has the best algorithm. Not super interesting.
+    Task 3 is a subtask of Task 5 so we need to prepare the data only once.
+    Difficulty: We need to process both T1 and T2, but T1 has 2 'modalities' (phases). nnU-Net cannot handly varying
+    number of input channels. We need to be creative.
+    We deal with this by preparing 2 Variants:
+    1) pretend we have 2 modalities for T2 as well by simply stacking a copy of the data
+    2) treat all MRI sequences independently, so we now have 3*20 training data instead of 2*20. In inference we then
+    ensemble the results for the two t1 modalities.
+    Careful: We need to split manually here to ensure we stratify by patient
+    """
+    root = "/media/fabian/My Book/datasets/CHAOS_challenge/Train_Sets"
+    root_test = "/media/fabian/My Book/datasets/CHAOS_challenge/Test_Sets"
+    out_base = nnUNet_raw_data
+    # CT
+    # we ignore CT because
+    ##############################################################
+    # Variant 1
+    ##############################################################
+    patient_ids = []
+    patient_ids_test = []
+    output_folder = join(out_base, "Task037_CHAOS_Task_3_5_Variant1")
+    output_images = join(output_folder, "imagesTr")
+    output_labels = join(output_folder, "labelsTr")
+    output_imagesTs = join(output_folder, "imagesTs")
+    maybe_mkdir_p(output_images)
+    maybe_mkdir_p(output_labels)
+    maybe_mkdir_p(output_imagesTs)
+    # Process T1 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T1_" + p
+        gt_dir = join(d, p, "T1DUAL", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_images, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_images, patient_name + "_0001.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
+        patient_ids.append(patient_name)
+    # Process T1 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T1_" + p
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_imagesTs, patient_name + "_0001.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name)
+    # Process T2 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_images, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        shutil.copy(join(output_images, patient_name + "_0000.nii.gz"), join(output_images, patient_name + "_0001.nii.gz"))
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
+        patient_ids.append(patient_name)
+    # Process T2 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        shutil.copy(join(output_imagesTs, patient_name + "_0000.nii.gz"), join(output_imagesTs, patient_name + "_0001.nii.gz"))
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name)
+    json_dict = OrderedDict()
+    json_dict['name'] = "Chaos Challenge Task3/5 Variant 1"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "https://chaos.grand-challenge.org/Data/"
+    json_dict['licence'] = "see https://chaos.grand-challenge.org/Data/"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+        "1": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "liver",
+        "2": "right kidney",
+        "3": "left kidney",
+        "4": "spleen",
+    }
+    json_dict['numTraining'] = len(patient_ids)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_ids]
+    json_dict['test'] = []
+    save_json(json_dict, join(output_folder, "dataset.json"))
+    ##############################################################
+    # Variant 2
+    ##############################################################
+    patient_ids = []
+    patient_ids_test = []
+    output_folder = join(out_base, "Task038_CHAOS_Task_3_5_Variant2")
+    output_images = join(output_folder, "imagesTr")
+    output_imagesTs = join(output_folder, "imagesTs")
+    output_labels = join(output_folder, "labelsTr")
+    maybe_mkdir_p(output_images)
+    maybe_mkdir_p(output_imagesTs)
+    maybe_mkdir_p(output_labels)
+    # Process T1 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name_in = "T1_in_" + p
+        patient_name_out = "T1_out_" + p
+        gt_dir = join(d, p, "T1DUAL", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_images, patient_name_in + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_images, patient_name_out + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name_in + ".nii.gz"))
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name_out + ".nii.gz"))
+        patient_ids.append(patient_name_out)
+        patient_ids.append(patient_name_in)
+    # Process T1 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name_in = "T1_in_" + p
+        patient_name_out = "T1_out_" + p
+        gt_dir = join(d, p, "T1DUAL", "Ground")
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "InPhase")
+        img_outfile = join(output_imagesTs, patient_name_in + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_dir = join(d, p, "T1DUAL", "DICOM_anon", "OutPhase")
+        img_outfile = join(output_imagesTs, patient_name_out + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name_out)
+        patient_ids_test.append(patient_name_in)
+    # Process T2 train
+    d = join(root, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        seg = convert_MR_seg(load_png_stack(gt_dir)[::-1])
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_images, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        seg_itk = sitk.GetImageFromArray(seg.astype(np.uint8))
+        seg_itk = copy_geometry(seg_itk, img_sitk)
+        sitk.WriteImage(seg_itk, join(output_labels, patient_name + ".nii.gz"))
+        patient_ids.append(patient_name)
+    # Process T2 test
+    d = join(root_test, "MR")
+    patients = subdirs(d, join=False)
+    for p in patients:
+        patient_name = "T2_" + p
+        gt_dir = join(d, p, "T2SPIR", "Ground")
+        img_dir = join(d, p, "T2SPIR", "DICOM_anon")
+        img_outfile = join(output_imagesTs, patient_name + "_0000.nii.gz")
+        _ = dicom2nifti.convert_dicom.dicom_series_to_nifti(img_dir, img_outfile, reorient_nifti=False)
+        img_sitk = sitk.ReadImage(img_outfile)
+        img_sitk_npy = sitk.GetArrayFromImage(img_sitk)
+        patient_ids_test.append(patient_name)
+    json_dict = OrderedDict()
+    json_dict['name'] = "Chaos Challenge Task3/5 Variant 2"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "https://chaos.grand-challenge.org/Data/"
+    json_dict['licence'] = "see https://chaos.grand-challenge.org/Data/"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "MRI",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "liver",
+        "2": "right kidney",
+        "3": "left kidney",
+        "4": "spleen",
+    }
+    json_dict['numTraining'] = len(patient_ids)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_ids]
+    json_dict['test'] = []
+    save_json(json_dict, join(output_folder, "dataset.json"))
+    #################################################
+    # custom split
+    #################################################
+    patients = subdirs(join(root, "MR"), join=False)
+    task_name_variant1 = "Task037_CHAOS_Task_3_5_Variant1"
+    task_name_variant2 = "Task038_CHAOS_Task_3_5_Variant2"
+    output_preprocessed_v1 = join(preprocessing_output_dir, task_name_variant1)
+    maybe_mkdir_p(output_preprocessed_v1)
+    output_preprocessed_v2 = join(preprocessing_output_dir, task_name_variant2)
+    maybe_mkdir_p(output_preprocessed_v2)
+    splits = []
+    for fold in range(5):
+        tr, val = get_split_deterministic(patients, fold, 5, 12345)
+        train = ["T2_" + i for i in tr] + ["T1_" + i for i in tr]
+        validation = ["T2_" + i for i in val] + ["T1_" + i for i in val]
+        splits.append({
+            'train': train,
+            'val': validation
+        })
+    save_pickle(splits, join(output_preprocessed_v1, "splits_final.pkl"))
+    splits = []
+    for fold in range(5):
+        tr, val = get_split_deterministic(patients, fold, 5, 12345)
+        train = ["T2_" + i for i in tr] + ["T1_in_" + i for i in tr] + ["T1_out_" + i for i in tr]
+        validation = ["T2_" + i for i in val] + ["T1_in_" + i for i in val] + ["T1_out_" + i for i in val]
+        splits.append({
+            'train': train,
+            'val': validation
+        })
+    save_pickle(splits, join(output_preprocessed_v2, "splits_final.pkl"))

nnunet/dataset_conversion/Task040_KiTS.py ADDED Viewed

	@@ -0,0 +1,240 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from copy import deepcopy
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import SimpleITK as sitk
+from multiprocessing import Pool
+from medpy.metric import dc
+import numpy as np
+from nnunet.paths import network_training_output_dir
+from scipy.ndimage import label
+def compute_dice_scores(ref: str, pred: str):
+    ref = sitk.GetArrayFromImage(sitk.ReadImage(ref))
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(pred))
+    kidney_mask_ref = ref > 0
+    kidney_mask_pred = pred > 0
+    if np.sum(kidney_mask_pred) == 0 and kidney_mask_ref.sum() == 0:
+        kidney_dice = np.nan
+    else:
+        kidney_dice = dc(kidney_mask_pred, kidney_mask_ref)
+    tumor_mask_ref = ref == 2
+    tumor_mask_pred = pred == 2
+    if np.sum(tumor_mask_ref) == 0 and tumor_mask_pred.sum() == 0:
+        tumor_dice = np.nan
+    else:
+        tumor_dice = dc(tumor_mask_ref, tumor_mask_pred)
+    geometric_mean = np.mean((kidney_dice, tumor_dice))
+    return kidney_dice, tumor_dice, geometric_mean
+def evaluate_folder(folder_gt: str, folder_pred: str):
+    p = Pool(8)
+    niftis = subfiles(folder_gt, suffix=".nii.gz", join=False)
+    images_gt = [join(folder_gt, i) for i in niftis]
+    images_pred = [join(folder_pred, i) for i in niftis]
+    results = p.starmap(compute_dice_scores, zip(images_gt, images_pred))
+    p.close()
+    p.join()
+    with open(join(folder_pred, "results.csv"), 'w') as f:
+        for i, ni in enumerate(niftis):
+            f.write("%s,%0.4f,%0.4f,%0.4f\n" % (ni, *results[i]))
+def remove_all_but_the_two_largest_conn_comp(img_itk_file: str, file_out: str):
+    """
+    This was not used. I was just curious because others used this. Turns out this is not necessary for my networks
+    """
+    img_itk = sitk.ReadImage(img_itk_file)
+    img_npy = sitk.GetArrayFromImage(img_itk)
+    labelmap, num_labels = label((img_npy > 0).astype(int))
+    if num_labels > 2:
+        label_sizes = []
+        for i in range(1, num_labels + 1):
+            label_sizes.append(np.sum(labelmap == i))
+        argsrt = np.argsort(label_sizes)[::-1] # two largest are now argsrt[0] and argsrt[1]
+        keep_mask = (labelmap == argsrt[0] + 1) | (labelmap == argsrt[1] + 1)
+        img_npy[~keep_mask] = 0
+        new = sitk.GetImageFromArray(img_npy)
+        new.CopyInformation(img_itk)
+        sitk.WriteImage(new, file_out)
+        print(os.path.basename(img_itk_file), num_labels, label_sizes)
+    else:
+        shutil.copy(img_itk_file, file_out)
+def manual_postprocess(folder_in,
+                       folder_out):
+    """
+    This was not used. I was just curious because others used this. Turns out this is not necessary for my networks
+    """
+    maybe_mkdir_p(folder_out)
+    infiles = subfiles(folder_in, suffix=".nii.gz", join=False)
+    outfiles = [join(folder_out, i) for i in infiles]
+    infiles = [join(folder_in, i) for i in infiles]
+    p = Pool(8)
+    _ = p.starmap_async(remove_all_but_the_two_largest_conn_comp, zip(infiles, outfiles))
+    _ = _.get()
+    p.close()
+    p.join()
+def copy_npz_fom_valsets():
+    '''
+    this is preparation for ensembling
+    :return:
+    '''
+    base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
+    folders = ['nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23__nnUNetPlans']
+    for f in folders:
+        out = join(base, f, 'crossval_npz')
+        maybe_mkdir_p(out)
+        shutil.copy(join(base, f, 'plans.pkl'), out)
+        for fold in range(5):
+            cur = join(base, f, 'fold_%d' % fold, 'validation_raw')
+            npz_files = subfiles(cur, suffix='.npz', join=False)
+            pkl_files = [i[:-3] + 'pkl' for i in npz_files]
+            assert all([isfile(join(cur, i)) for i in pkl_files])
+            for n in npz_files:
+                corresponding_pkl = n[:-3] + 'pkl'
+                shutil.copy(join(cur, n), out)
+                shutil.copy(join(cur, corresponding_pkl), out)
+def ensemble(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans'), out_dir="/media/fabian/Results/nnUNet/3d_lowres/Task048_KiTS_clean/ensemble_preactres_and_res"):
+    from nnunet.inference.ensemble_predictions import merge
+    folders = [join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean", i, 'crossval_npz') for i in experiments]
+    merge(folders, out_dir, 8)
+def prepare_submission(fld= "/home/fabian/drives/datasets/results/nnUNet/test_sets/Task048_KiTS_clean/predicted_ens_3d_fullres_3d_cascade_fullres_postprocessed", # '/home/fabian/datasets_fabian/predicted_KiTS_nnUNetTrainerNewCandidate23_FabiansResNet',
+                       out='/home/fabian/drives/datasets/results/nnUNet/test_sets/Task048_KiTS_clean/submission'):
+    nii = subfiles(fld, join=False, suffix='.nii.gz')
+    maybe_mkdir_p(out)
+    for n in nii:
+        outfname = n.replace('case', 'prediction')
+        shutil.copy(join(fld, n), join(out, outfname))
+def pretent_to_be_nnUNetTrainer(base, folds=(0, 1, 2, 3, 4)):
+    """
+    changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
+    :param experiments:
+    :return:
+    """
+    for fold in folds:
+        cur = join(base, "fold_%d" % fold)
+        pkl_file = join(cur, 'model_best.model.pkl')
+        a = load_pickle(pkl_file)
+        a['name_old'] = deepcopy(a['name'])
+        a['name'] = 'nnUNetTrainer'
+        save_pickle(a, pkl_file)
+def reset_trainerName(base, folds=(0, 1, 2, 3, 4)):
+    for fold in folds:
+        cur = join(base, "fold_%d" % fold)
+        pkl_file = join(cur, 'model_best.model.pkl')
+        a = load_pickle(pkl_file)
+        a['name'] = a['name_old']
+        del a['name_old']
+        save_pickle(a, pkl_file)
+def nnUNetTrainer_these(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23__nnUNetPlans')):
+    """
+    changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
+    :param experiments:
+    :return:
+    """
+    base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
+    for exp in experiments:
+        cur = join(base, exp)
+        pretent_to_be_nnUNetTrainer(cur)
+def reset_trainerName_these(experiments=('nnUNetTrainerNewCandidate23_FabiansPreActResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23_FabiansResNet__nnUNetPlans',
+               'nnUNetTrainerNewCandidate23__nnUNetPlans')):
+    """
+    changes best checkpoint pickle nnunettrainer class name to nnUNetTrainer
+    :param experiments:
+    :return:
+    """
+    base = join(network_training_output_dir, "3d_lowres/Task048_KiTS_clean")
+    for exp in experiments:
+        cur = join(base, exp)
+        reset_trainerName(cur)
+if __name__ == "__main__":
+    base = "/media/fabian/My Book/datasets/KiTS2019_Challenge/kits19/data"
+    out = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task040_KiTS"
+    cases = subdirs(base, join=False)
+    maybe_mkdir_p(out)
+    maybe_mkdir_p(join(out, "imagesTr"))
+    maybe_mkdir_p(join(out, "imagesTs"))
+    maybe_mkdir_p(join(out, "labelsTr"))
+    for c in cases:
+        case_id = int(c.split("_")[-1])
+        if case_id < 210:
+            shutil.copy(join(base, c, "imaging.nii.gz"), join(out, "imagesTr", c + "_0000.nii.gz"))
+            shutil.copy(join(base, c, "segmentation.nii.gz"), join(out, "labelsTr", c + ".nii.gz"))
+        else:
+            shutil.copy(join(base, c, "imaging.nii.gz"), join(out, "imagesTs", c + "_0000.nii.gz"))
+    json_dict = {}
+    json_dict['name'] = "KiTS"
+    json_dict['description'] = "kidney and kidney tumor segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "KiTS data for nnunet"
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Kidney",
+        "2": "Tumor"
+    }
+    json_dict['numTraining'] = len(cases)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             cases]
+    json_dict['test'] = []
+    save_json(json_dict, os.path.join(out, "dataset.json"))

nnunet/dataset_conversion/Task043_BraTS_2019.py ADDED Viewed

	@@ -0,0 +1,164 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import numpy as np
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+import SimpleITK as sitk
+import shutil
+def copy_BraTS_segmentation_and_convert_labels(in_file, out_file):
+    # use this for segmentation only!!!
+    # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3
+    img = sitk.ReadImage(in_file)
+    img_npy = sitk.GetArrayFromImage(img)
+    uniques = np.unique(img_npy)
+    for u in uniques:
+        if u not in [0, 1, 2, 4]:
+            raise RuntimeError('unexpected label')
+    seg_new = np.zeros_like(img_npy)
+    seg_new[img_npy == 4] = 3
+    seg_new[img_npy == 2] = 1
+    seg_new[img_npy == 1] = 2
+    img_corr = sitk.GetImageFromArray(seg_new)
+    img_corr.CopyInformation(img)
+    sitk.WriteImage(img_corr, out_file)
+if __name__ == "__main__":
+    """
+    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+    """
+    task_name = "Task043_BraTS2019"
+    downloaded_data_dir = "/home/sdp/MLPERF/Brats2019_DATA/MICCAI_BraTS_2019_Data_Training"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    patient_names = []
+    for tpe in ["HGG", "LGG"]:
+        cur = join(downloaded_data_dir, tpe)
+        for p in subdirs(cur, join=False):
+            patdir = join(cur, p)
+            patient_name = tpe + "__" + p
+            patient_names.append(patient_name)
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            seg = join(patdir, p + "_seg.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+                isfile(seg)
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+            copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "BraTS2019"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see BraTS2019"
+    json_dict['licence'] = "see BraTS2019 license"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "T1",
+        "1": "T1ce",
+        "2": "T2",
+        "3": "FLAIR"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "edema",
+        "2": "non-enhancing",
+        "3": "enhancing",
+    }
+    json_dict['numTraining'] = len(patient_names)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_names]
+    json_dict['test'] = []
+    save_json(json_dict, join(target_base, "dataset.json"))
+    downloaded_data_dir = "/home/sdp/MLPERF/Brats2019_DATA/MICCAI_BraTS_2019_Data_Validation"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
+    """
+    #I dont have the testing data
+    downloaded_data_dir = "/home/fabian/Downloads/BraTS2018_train_val_test_data/MICCAI_BraTS_2018_Data_Testing_FIsensee"
+    for p in subdirs(downloaded_data_dir, join=False):
+        patdir = join(downloaded_data_dir, p)
+        patient_name = p
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))"""

nnunet/dataset_conversion/Task055_SegTHOR.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+import SimpleITK as sitk
+def convert_for_submission(source_dir, target_dir):
+    """
+    I believe they want .nii, not .nii.gz
+    :param source_dir:
+    :param target_dir:
+    :return:
+    """
+    files = subfiles(source_dir, suffix=".nii.gz", join=False)
+    maybe_mkdir_p(target_dir)
+    for f in files:
+        img = sitk.ReadImage(join(source_dir, f))
+        out_file = join(target_dir, f[:-7] + ".nii")
+        sitk.WriteImage(img, out_file)
+if __name__ == "__main__":
+    base = "/media/fabian/DeepLearningData/SegTHOR"
+    task_id = 55
+    task_name = "SegTHOR"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    train_patients = subfolders(join(base, "train"), join=False)
+    for p in train_patients:
+        curr = join(base, "train", p)
+        label_file = join(curr, "GT.nii.gz")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names.append(p)
+    test_patients = subfiles(join(base, "test"), join=False, suffix=".nii.gz")
+    for p in test_patients:
+        p = p[:-7]
+        curr = join(base, "test")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+        test_patient_names.append(p)
+    json_dict = OrderedDict()
+    json_dict['name'] = "SegTHOR"
+    json_dict['description'] = "SegTHOR"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "esophagus",
+        "2": "heart",
+        "3": "trachea",
+        "4": "aorta",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task056_VerSe2019.py ADDED Viewed

	@@ -0,0 +1,274 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+from multiprocessing.pool import Pool
+from nnunet.configuration import default_num_threads
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from medpy import metric
+import numpy as np
+from nnunet.utilities.image_reorientation import reorient_all_images_in_folder_to_ras
+def check_if_all_in_good_orientation(imagesTr_folder: str, labelsTr_folder: str, output_folder: str) -> None:
+    maybe_mkdir_p(output_folder)
+    filenames = subfiles(labelsTr_folder, suffix='.nii.gz', join=False)
+    import matplotlib.pyplot as plt
+    for n in filenames:
+        img = sitk.GetArrayFromImage(sitk.ReadImage(join(imagesTr_folder, n[:-7] + '_0000.nii.gz')))
+        lab = sitk.GetArrayFromImage(sitk.ReadImage(join(labelsTr_folder, n)))
+        assert np.all([i == j for i, j in zip(img.shape, lab.shape)])
+        z_slice = img.shape[0] // 2
+        img_slice = img[z_slice]
+        lab_slice = lab[z_slice]
+        lab_slice[lab_slice != 0] = 1
+        img_slice = img_slice - img_slice.min()
+        img_slice = img_slice / img_slice.max()
+        stacked = np.vstack((img_slice, lab_slice))
+        print(stacked.shape)
+        plt.imsave(join(output_folder, n[:-7] + '.png'), stacked, cmap='gray')
+def evaluate_verse_case(sitk_file_ref:str, sitk_file_test:str):
+    """
+    Only vertebra that are present in the reference will be evaluated
+    :param sitk_file_ref:
+    :param sitk_file_test:
+    :return:
+    """
+    gt_npy = sitk.GetArrayFromImage(sitk.ReadImage(sitk_file_ref))
+    pred_npy = sitk.GetArrayFromImage(sitk.ReadImage(sitk_file_test))
+    dice_scores = []
+    for label in range(1, 26):
+        mask_gt = gt_npy == label
+        if np.sum(mask_gt) > 0:
+            mask_pred = pred_npy == label
+            dc = metric.dc(mask_pred, mask_gt)
+        else:
+            dc = np.nan
+        dice_scores.append(dc)
+    return dice_scores
+def evaluate_verse_folder(folder_pred, folder_gt, out_json="/home/fabian/verse.json"):
+    p = Pool(default_num_threads)
+    files_gt_bare = subfiles(folder_gt, join=False)
+    assert all([isfile(join(folder_pred, i)) for i in files_gt_bare]), "some files are missing in the predicted folder"
+    files_pred = [join(folder_pred, i) for i in files_gt_bare]
+    files_gt = [join(folder_gt, i) for i in files_gt_bare]
+    results = p.starmap_async(evaluate_verse_case, zip(files_gt, files_pred))
+    results = results.get()
+    dct = {i: j for i, j in zip(files_gt_bare, results)}
+    results_stacked = np.vstack(results)
+    results_mean = np.nanmean(results_stacked, 0)
+    overall_mean = np.nanmean(results_mean)
+    save_json((dct, list(results_mean), overall_mean), out_json)
+    p.close()
+    p.join()
+def print_unique_labels_and_their_volumes(image: str, print_only_if_vol_smaller_than: float = None):
+    img = sitk.ReadImage(image)
+    voxel_volume = np.prod(img.GetSpacing())
+    img_npy = sitk.GetArrayFromImage(img)
+    uniques = [i for i in np.unique(img_npy) if i != 0]
+    volumes = {i: np.sum(img_npy == i) * voxel_volume for i in uniques}
+    print('')
+    print(image.split('/')[-1])
+    print('uniques:', uniques)
+    for k in volumes.keys():
+        v = volumes[k]
+        if print_only_if_vol_smaller_than is not None and v > print_only_if_vol_smaller_than:
+            pass
+        else:
+            print('k:', k, '\tvol:', volumes[k])
+def remove_label(label_file: str, remove_this: int, replace_with: int = 0):
+    img = sitk.ReadImage(label_file)
+    img_npy = sitk.GetArrayFromImage(img)
+    img_npy[img_npy == remove_this] = replace_with
+    img2 = sitk.GetImageFromArray(img_npy)
+    img2.CopyInformation(img)
+    sitk.WriteImage(img2, label_file)
+if __name__ == "__main__":
+    ### First we create a nnunet dataset from verse. After this the images will be all willy nilly in their
+    # orientation because that's how VerSe comes
+    base = '/media/fabian/DeepLearningData/VerSe2019'
+    base = "/home/fabian/data/VerSe2019"
+    # correct orientation
+    train_files_base = subfiles(join(base, "train"), join=False, suffix="_seg.nii.gz")
+    train_segs = [i[:-len("_seg.nii.gz")] + "_seg.nii.gz" for i in train_files_base]
+    train_data = [i[:-len("_seg.nii.gz")] + ".nii.gz" for i in train_files_base]
+    test_files_base = [i[:-len(".nii.gz")] for i in subfiles(join(base, "test"), join=False, suffix=".nii.gz")]
+    test_data = [i + ".nii.gz" for i in test_files_base]
+    task_id = 56
+    task_name = "VerSe"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = [i[:-len("_seg.nii.gz")] for i in subfiles(join(base, "train"), join=False, suffix="_seg.nii.gz")]
+    for p in train_patient_names:
+        curr = join(base, "train")
+        label_file = join(curr, p + "_seg.nii.gz")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+    test_patient_names = [i[:-7] for i in subfiles(join(base, "test"), join=False, suffix=".nii.gz")]
+    for p in test_patient_names:
+        curr = join(base, "test")
+        image_file = join(curr, p + ".nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "VerSe2019"
+    json_dict['description'] = "VerSe2019"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(26)}
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    # now we reorient all those images to ras. This saves a pkl with the original affine. We need this information to
+    # bring our predictions into the same geometry for submission
+    reorient_all_images_in_folder_to_ras(imagestr)
+    reorient_all_images_in_folder_to_ras(imagests)
+    reorient_all_images_in_folder_to_ras(labelstr)
+    # sanity check
+    check_if_all_in_good_orientation(imagestr, labelstr, join(out_base, 'sanitycheck'))
+    # looks good to me - proceed
+    # check the volumes of the vertebrae
+    _ = [print_unique_labels_and_their_volumes(i, 1000) for i in subfiles(labelstr, suffix='.nii.gz')]
+    # some cases appear fishy. For example, verse063.nii.gz has labels [1, 20, 21, 22, 23, 24] and 1 only has a volume
+    # of 63mm^3
+    #let's correct those
+    # 19 is connected to the image border and should not be segmented. Only one slice of 19 is segmented in the
+    # reference. Looks wrong
+    remove_label(join(labelstr, 'verse031.nii.gz'), 19, 0)
+    # spurious annotation of 18 (vol: 8.00)
+    remove_label(join(labelstr, 'verse060.nii.gz'), 18, 0)
+    # spurious annotation of 16 (vol: 3.00)
+    remove_label(join(labelstr, 'verse061.nii.gz'), 16, 0)
+    # spurious annotation of 1 (vol: 63.00) although the rest of the vertebra is [20, 21, 22, 23, 24]
+    remove_label(join(labelstr, 'verse063.nii.gz'), 1, 0)
+    # spurious annotation of 3 (vol: 9.53) although the rest of the vertebra is
+    # [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+    remove_label(join(labelstr, 'verse074.nii.gz'), 3, 0)
+    # spurious annotation of 3 (vol: 15.00)
+    remove_label(join(labelstr, 'verse097.nii.gz'), 3, 0)
+    # spurious annotation of 3 (vol: 10) although the rest of the vertebra is
+    # [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+    remove_label(join(labelstr, 'verse151.nii.gz'), 3, 0)
+    # spurious annotation of 25 (vol: 4) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse201.nii.gz'), 25, 0)
+    # spurious annotation of 23 (vol: 8) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse207.nii.gz'), 23, 0)
+    # spurious annotation of 23 (vol: 12) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse208.nii.gz'), 23, 0)
+    # spurious annotation of 23 (vol: 2) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse212.nii.gz'), 23, 0)
+    # spurious annotation of 20 (vol: 4) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse214.nii.gz'), 20, 0)
+    # spurious annotation of 23 (vol: 15) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse223.nii.gz'), 23, 0)
+    # spurious annotation of 23 (vol: 1) and 25 (vol: 7) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    remove_label(join(labelstr, 'verse226.nii.gz'), 23, 0)
+    remove_label(join(labelstr, 'verse226.nii.gz'), 25, 0)
+    # spurious annotation of 25 (vol: 27) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse227.nii.gz'), 25, 0)
+    # spurious annotation of 20 (vol: 24) although the rest of the vertebra is
+    # [1, 2, 3, 4, 5, 6, 7, 8]
+    remove_label(join(labelstr, 'verse232.nii.gz'), 20, 0)
+    # Now we are ready to run nnU-Net
+    """# run this part of the code once training is done
+    folder_gt = "/media/fabian/My Book/MedicalDecathlon/nnUNet_raw_splitted/Task056_VerSe/labelsTr"
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_fullres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_fullres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_lowres/Task056_VerSe/nnUNetTrainerV2__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_lowres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)
+    folder_pred = "/home/fabian/drives/datasets/results/nnUNet/3d_cascade_fullres/Task056_VerSe/nnUNetTrainerV2CascadeFullRes__nnUNetPlansv2.1/cv_niftis_raw"
+    out_json = "/home/fabian/Task056_VerSe_3d_cascade_fullres_summary.json"
+    evaluate_verse_folder(folder_pred, folder_gt, out_json)"""

nnunet/dataset_conversion/Task056_Verse_normalize_orientation.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+"""
+This code is copied from https://gist.github.com/nlessmann/24d405eaa82abba6676deb6be839266c. All credits go to the
+original author (user nlessmann on GitHub)
+"""
+import numpy as np
+import SimpleITK as sitk
+def reverse_axes(image):
+    return np.transpose(image, tuple(reversed(range(image.ndim))))
+def read_image(imagefile):
+    image = sitk.ReadImage(imagefile)
+    data = reverse_axes(sitk.GetArrayFromImage(image))  # switch from zyx to xyz
+    header = {
+        'spacing': image.GetSpacing(),
+        'origin': image.GetOrigin(),
+        'direction': image.GetDirection()
+    }
+    return data, header
+def save_image(img: np.ndarray, header: dict, output_file: str):
+    """
+    CAREFUL you need to restore_original_slice_orientation before saving!
+    :param img:
+    :param header:
+    :return:
+    """
+    # reverse back
+    img = reverse_axes(img)  # switch from zyx to xyz
+    img_itk = sitk.GetImageFromArray(img)
+    img_itk.SetSpacing(header['spacing'])
+    img_itk.SetOrigin(header['origin'])
+    if not isinstance(header['direction'], tuple):
+        img_itk.SetDirection(header['direction'].flatten())
+    else:
+        img_itk.SetDirection(header['direction'])
+    sitk.WriteImage(img_itk, output_file)
+def swap_flip_dimensions(cosine_matrix, image, header=None):
+    # Compute swaps and flips
+    swap = np.argmax(abs(cosine_matrix), axis=0)
+    flip = np.sum(cosine_matrix, axis=0)
+    # Apply transformation to image volume
+    image = np.transpose(image, tuple(swap))
+    image = image[tuple(slice(None, None, int(f)) for f in flip)]
+    if header is None:
+        return image
+    # Apply transformation to header
+    header['spacing'] = tuple(header['spacing'][s] for s in swap)
+    header['direction'] = np.eye(3)
+    return image, header
+def normalize_slice_orientation(image, header):
+    # Preserve original header so that we can easily transform back
+    header['original'] = header.copy()
+    # Compute inverse of cosine (round first because we assume 0/1 values only)
+    # to determine how the image has to be transposed and flipped for cosine = identity
+    cosine = np.asarray(header['direction']).reshape(3, 3)
+    cosine_inv = np.linalg.inv(np.round(cosine))
+    return swap_flip_dimensions(cosine_inv, image, header)
+def restore_original_slice_orientation(mask, header):
+    # Use original orientation for transformation because we assume the image to be in
+    # normalized orientation, i.e., identity cosine)
+    cosine = np.asarray(header['original']['direction']).reshape(3, 3)
+    cosine_rnd = np.round(cosine)
+    # Apply transformations to both the image and the mask
+    return swap_flip_dimensions(cosine_rnd, mask), header['original']

nnunet/dataset_conversion/Task058_ISBI_EM_SEG.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+from skimage import io
+def export_for_submission(predicted_npz, out_file):
+    """
+    they expect us to submit a 32 bit 3d tif image with values between 0 (100% membrane certainty) and 1
+    (100% non-membrane certainty). We use the softmax output for that
+    :return:
+    """
+    a = np.load(predicted_npz)['softmax']
+    a = a / a.sum(0)[None]
+    # channel 0 is non-membrane prob
+    nonmembr_prob = a[0]
+    assert out_file.endswith(".tif")
+    io.imsave(out_file, nonmembr_prob.astype(np.float32))
+if __name__ == "__main__":
+    # download from here http://brainiac2.mit.edu/isbi_challenge/downloads
+    base = "/media/fabian/My Book/datasets/ISBI_EM_SEG"
+    # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
+    # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
+    train_volume = io.imread(join(base, "train-volume.tif"))
+    train_labels = io.imread(join(base, "train-labels.tif"))
+    train_labels[train_labels == 255] = 1
+    test_volume = io.imread(join(base, "test-volume.tif"))
+    task_id = 58
+    task_name = "ISBI_EM_SEG"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
+    lab_tr_itk = sitk.GetImageFromArray(1 - train_labels) # walls are foreground, cells background
+    img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
+    img_tr_itk.SetSpacing((4, 4, 50))
+    lab_tr_itk.SetSpacing((4, 4, 50))
+    img_te_itk.SetSpacing((4, 4, 50))
+    # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training1_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training2_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training3_0000.nii.gz"))
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training4_0000.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training1.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training2.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training3.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training4.nii.gz"))
+    sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = task_name
+    json_dict['description'] = task_name
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "EM",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(2)}
+    json_dict['numTraining'] = 5
+    json_dict['numTest'] = 1
+    json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
+                             range(5)]
+    json_dict['test'] = ["./imagesTs/testing.nii.gz"]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task059_EPFL_EM_MITO_SEG.py ADDED Viewed

	@@ -0,0 +1,99 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import numpy as np
+import subprocess
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from skimage import io
+import SimpleITK as sitk
+import shutil
+if __name__ == "__main__":
+    # download from here https://www.epfl.ch/labs/cvlab/data/data-em/
+    base = "/media/fabian/My Book/datasets/EPFL_MITO_SEG"
+    # the orientation of VerSe is all fing over the place. run fslreorient2std to correct that (hopefully!)
+    # THIS CAN HAVE CONSEQUENCES FOR THE TEST SET SUBMISSION! CAREFUL!
+    train_volume = io.imread(join(base, "training.tif"))
+    train_labels = io.imread(join(base, "training_groundtruth.tif"))
+    train_labels[train_labels == 255] = 1
+    test_volume = io.imread(join(base, "testing.tif"))
+    test_labels = io.imread(join(base, "testing_groundtruth.tif"))
+    test_labels[test_labels == 255] = 1
+    task_id = 59
+    task_name = "EPFL_EM_MITO_SEG"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    labelste = join(out_base, "labelsTs")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    maybe_mkdir_p(labelste)
+    img_tr_itk = sitk.GetImageFromArray(train_volume.astype(np.float32))
+    lab_tr_itk = sitk.GetImageFromArray(train_labels.astype(np.uint8))
+    img_te_itk = sitk.GetImageFromArray(test_volume.astype(np.float32))
+    lab_te_itk = sitk.GetImageFromArray(test_labels.astype(np.uint8))
+    img_tr_itk.SetSpacing((5, 5, 5))
+    lab_tr_itk.SetSpacing((5, 5, 5))
+    img_te_itk.SetSpacing((5, 5, 5))
+    lab_te_itk.SetSpacing((5, 5, 5))
+    # 5 copies, otherwise we cannot run nnunet (5 fold cv needs that)
+    sitk.WriteImage(img_tr_itk, join(imagestr, "training0_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training1_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training2_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training3_0000.nii.gz"))
+    shutil.copy(join(imagestr, "training0_0000.nii.gz"), join(imagestr, "training4_0000.nii.gz"))
+    sitk.WriteImage(lab_tr_itk, join(labelstr, "training0.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training1.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training2.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training3.nii.gz"))
+    shutil.copy(join(labelstr, "training0.nii.gz"), join(labelstr, "training4.nii.gz"))
+    sitk.WriteImage(img_te_itk, join(imagests, "testing.nii.gz"))
+    sitk.WriteImage(lab_te_itk, join(labelste, "testing.nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = task_name
+    json_dict['description'] = task_name
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "EM",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(2)}
+    json_dict['numTraining'] = 5
+    json_dict['numTest'] = 1
+    json_dict['training'] = [{'image': "./imagesTr/training%d.nii.gz" % i, "label": "./labelsTr/training%d.nii.gz" % i} for i in
+                             range(5)]
+    json_dict['test'] = ["./imagesTs/testing.nii.gz"]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task061_CREMI.py ADDED Viewed

	@@ -0,0 +1,146 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from batchgenerators.utilities.file_and_folder_operations import *
+import numpy as np
+from nnunet.paths import nnUNet_raw_data, preprocessing_output_dir
+import shutil
+import SimpleITK as sitk
+try:
+    import h5py
+except ImportError:
+    h5py = None
+def load_sample(filename):
+    # we need raw data and seg
+    f = h5py.File(filename, 'r')
+    data = np.array(f['volumes']['raw'])
+    if 'labels' in f['volumes'].keys():
+        labels = np.array(f['volumes']['labels']['clefts'])
+        # clefts are low values, background is high
+        labels = (labels < 100000).astype(np.uint8)
+    else:
+        labels = None
+    return data, labels
+def save_as_nifti(arr, filename, spacing):
+    itk_img = sitk.GetImageFromArray(arr)
+    itk_img.SetSpacing(spacing)
+    sitk.WriteImage(itk_img, filename)
+def prepare_submission():
+    from cremi.io import CremiFile
+    from cremi.Volume import Volume
+    base = "/home/fabian/drives/datasets/results/nnUNet/test_sets/Task061_CREMI/"
+    # a+
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_a+.nii.gz"))).astype(np.uint64)
+    pred[pred == 0] = 0xffffffffffffffff
+    out_a = CremiFile(join(base, 'sample_A+_20160601.hdf'), 'w')
+    clefts = Volume(pred, (40., 4., 4.))
+    out_a.write_clefts(clefts)
+    out_a.close()
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_b+.nii.gz"))).astype(np.uint64)
+    pred[pred == 0] = 0xffffffffffffffff
+    out_b = CremiFile(join(base, 'sample_B+_20160601.hdf'), 'w')
+    clefts = Volume(pred, (40., 4., 4.))
+    out_b.write_clefts(clefts)
+    out_b.close()
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(join(base, 'results_3d_fullres', "sample_c+.nii.gz"))).astype(np.uint64)
+    pred[pred == 0] = 0xffffffffffffffff
+    out_c = CremiFile(join(base, 'sample_C+_20160601.hdf'), 'w')
+    clefts = Volume(pred, (40., 4., 4.))
+    out_c.write_clefts(clefts)
+    out_c.close()
+if __name__ == "__main__":
+    assert h5py is not None, "you need h5py for this. Install with 'pip install h5py'"
+    foldername = "Task061_CREMI"
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    base = "/media/fabian/My Book/datasets/CREMI"
+    # train
+    img, label = load_sample(join(base, "sample_A_20160501.hdf"))
+    save_as_nifti(img, join(imagestr, "sample_a_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_a.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_B_20160501.hdf"))
+    save_as_nifti(img, join(imagestr, "sample_b_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_b.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_C_20160501.hdf"))
+    save_as_nifti(img, join(imagestr, "sample_c_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_c.nii.gz"), (4, 4, 40))
+    save_as_nifti(img, join(imagestr, "sample_d_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_d.nii.gz"), (4, 4, 40))
+    save_as_nifti(img, join(imagestr, "sample_e_0000.nii.gz"), (4, 4, 40))
+    save_as_nifti(label, join(labelstr, "sample_e.nii.gz"), (4, 4, 40))
+    # test
+    img, label = load_sample(join(base, "sample_A+_20160601.hdf"))
+    save_as_nifti(img, join(imagests, "sample_a+_0000.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_B+_20160601.hdf"))
+    save_as_nifti(img, join(imagests, "sample_b+_0000.nii.gz"), (4, 4, 40))
+    img, label = load_sample(join(base, "sample_C+_20160601.hdf"))
+    save_as_nifti(img, join(imagests, "sample_c+_0000.nii.gz"), (4, 4, 40))
+    json_dict = OrderedDict()
+    json_dict['name'] = foldername
+    json_dict['description'] = foldername
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see challenge website"
+    json_dict['licence'] = "see challenge website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "EM",
+    }
+    json_dict['labels'] = {i: str(i) for i in range(2)}
+    json_dict['numTraining'] = 5
+    json_dict['numTest'] = 1
+    json_dict['training'] = [{'image': "./imagesTr/sample_%s.nii.gz" % i, "label": "./labelsTr/sample_%s.nii.gz" % i} for i in
+                             ['a', 'b', 'c', 'd', 'e']]
+    json_dict['test'] = ["./imagesTs/sample_a+.nii.gz", "./imagesTs/sample_b+.nii.gz", "./imagesTs/sample_c+.nii.gz"]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    out_preprocessed = join(preprocessing_output_dir, foldername)
+    maybe_mkdir_p(out_preprocessed)
+    # manual splits. we train 5 models on all three datasets
+    splits = [{'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]},
+              {'train': ["sample_a", "sample_b", "sample_c"], 'val': ["sample_a", "sample_b", "sample_c"]}]
+    save_pickle(splits, join(out_preprocessed, "splits_final.pkl"))

nnunet/dataset_conversion/Task062_NIHPancreas.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from collections import OrderedDict
+from nnunet.paths import nnUNet_raw_data
+from batchgenerators.utilities.file_and_folder_operations import *
+import shutil
+from multiprocessing import Pool
+import nibabel
+def reorient(filename):
+    img = nibabel.load(filename)
+    img = nibabel.as_closest_canonical(img)
+    nibabel.save(img, filename)
+if __name__ == "__main__":
+    base = "/media/fabian/DeepLearningData/Pancreas-CT"
+    # reorient
+    p = Pool(8)
+    results = []
+    for f in subfiles(join(base, "data"), suffix=".nii.gz"):
+        results.append(p.map_async(reorient, (f, )))
+    _ = [i.get() for i in results]
+    for f in subfiles(join(base, "TCIA_pancreas_labels-02-05-2017"), suffix=".nii.gz"):
+        results.append(p.map_async(reorient, (f, )))
+    _ = [i.get() for i in results]
+    task_id = 62
+    task_name = "NIHPancreas"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    cases = list(range(1, 83))
+    folder_data = join(base, "data")
+    folder_labels = join(base, "TCIA_pancreas_labels-02-05-2017")
+    for c in cases:
+        casename = "pancreas_%04.0d" % c
+        shutil.copy(join(folder_data, "PANCREAS_%04.0d.nii.gz" % c), join(imagestr, casename + "_0000.nii.gz"))
+        shutil.copy(join(folder_labels, "label%04.0d.nii.gz" % c), join(labelstr, casename + ".nii.gz"))
+        train_patient_names.append(casename)
+    json_dict = OrderedDict()
+    json_dict['name'] = task_name
+    json_dict['description'] = task_name
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see website"
+    json_dict['licence'] = "see website"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Pancreas",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task064_KiTS_labelsFixed.py ADDED Viewed

	@@ -0,0 +1,84 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+if __name__ == "__main__":
+    """
+    This is the KiTS dataset after Nick fixed all the labels that had errors. Downloaded on Jan 6th 2020
+    """
+    base = "/media/fabian/My Book/datasets/KiTS_clean/kits19/data"
+    task_id = 64
+    task_name = "KiTS_labelsFixed"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    all_cases = subfolders(base, join=False)
+    train_patients = all_cases[:210]
+    test_patients = all_cases[210:]
+    for p in train_patients:
+        curr = join(base, p)
+        label_file = join(curr, "segmentation.nii.gz")
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names.append(p)
+    for p in test_patients:
+        curr = join(base, p)
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+        test_patient_names.append(p)
+    json_dict = {}
+    json_dict['name'] = "KiTS"
+    json_dict['description'] = "kidney and kidney tumor segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "KiTS data for nnunet"
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Kidney",
+        "2": "Tumor"
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task065_KiTS_NicksLabels.py ADDED Viewed

	@@ -0,0 +1,87 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+if __name__ == "__main__":
+    """
+    Nick asked me to rerun the training with other labels (the Kidney region is defined differently).
+    These labels operate in interpolated spacing. I don't like that but that's how it is
+    """
+    base = "/media/fabian/My Book/datasets/KiTS_NicksLabels/kits19/data"
+    labelsdir = "/media/fabian/My Book/datasets/KiTS_NicksLabels/filled_labels"
+    task_id = 65
+    task_name = "KiTS_NicksLabels"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    all_cases = subfolders(base, join=False)
+    train_patients = all_cases[:210]
+    test_patients = all_cases[210:]
+    for p in train_patients:
+        curr = join(base, p)
+        label_file = join(labelsdir, p + ".nii.gz")
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagestr, p + "_0000.nii.gz"))
+        shutil.copy(label_file, join(labelstr, p + ".nii.gz"))
+        train_patient_names.append(p)
+    for p in test_patients:
+        curr = join(base, p)
+        image_file = join(curr, "imaging.nii.gz")
+        shutil.copy(image_file, join(imagests, p + "_0000.nii.gz"))
+        test_patient_names.append(p)
+    json_dict = {}
+    json_dict['name'] = "KiTS"
+    json_dict['description'] = "kidney and kidney tumor segmentation"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "KiTS data for nnunet"
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "CT",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "Kidney",
+        "2": "Tumor"
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task069_CovidSeg.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import shutil
+from batchgenerators.utilities.file_and_folder_operations import *
+import SimpleITK as sitk
+from nnunet.paths import nnUNet_raw_data
+if __name__ == '__main__':
+    #data is available at http://medicalsegmentation.com/covid19/
+    download_dir = '/home/fabian/Downloads'
+    task_id = 69
+    task_name = "CovidSeg"
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    # the niftis are 3d, but they are just stacks of 2d slices from different patients. So no 3d U-Net, please
+    # the training stack has 100 slices, so we split it into 5 equally sized parts (20 slices each) for cross-validation
+    training_data = sitk.GetArrayFromImage(sitk.ReadImage(join(download_dir, 'tr_im.nii.gz')))
+    training_labels = sitk.GetArrayFromImage(sitk.ReadImage(join(download_dir, 'tr_mask.nii.gz')))
+    for f in range(5):
+        this_name = 'part_%d' % f
+        data = training_data[f::5]
+        labels = training_labels[f::5]
+        sitk.WriteImage(sitk.GetImageFromArray(data), join(imagestr, this_name + '_0000.nii.gz'))
+        sitk.WriteImage(sitk.GetImageFromArray(labels), join(labelstr, this_name + '.nii.gz'))
+        train_patient_names.append(this_name)
+    shutil.copy(join(download_dir, 'val_im.nii.gz'), join(imagests, 'val_im.nii.gz'))
+    test_patient_names.append('val_im')
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "nonct",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "stuff1",
+        "2": "stuff2",
+        "3": "stuff3",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i.split("/")[-1] for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))

nnunet/dataset_conversion/Task075_Fluo_C3DH_A549_ManAndSim.py ADDED Viewed

	@@ -0,0 +1,137 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from multiprocessing import Pool
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunet.paths import nnUNet_raw_data
+from nnunet.paths import preprocessing_output_dir
+from skimage.io import imread
+def load_tiff_convert_to_nifti(img_file, lab_file, img_out_base, anno_out, spacing):
+    img = imread(img_file)
+    img_itk = sitk.GetImageFromArray(img.astype(np.float32))
+    img_itk.SetSpacing(np.array(spacing)[::-1])
+    sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
+    if lab_file is not None:
+        l = imread(lab_file)
+        l[l > 0] = 1
+        l_itk = sitk.GetImageFromArray(l.astype(np.uint8))
+        l_itk.SetSpacing(np.array(spacing)[::-1])
+        sitk.WriteImage(l_itk, anno_out)
+def prepare_task(base, task_id, task_name, spacing):
+    p = Pool(16)
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    res = []
+    for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
+        train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
+        for t in train_cases:
+            casename = train_sequence + "_" + t[:-4]
+            img_file = join(base + '_train', train_sequence, t)
+            lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
+            if not isfile(lab_file):
+                continue
+            img_out_base = join(imagestr, casename)
+            anno_out = join(labelstr, casename + ".nii.gz")
+            res.append(
+                p.starmap_async(load_tiff_convert_to_nifti, ((img_file, lab_file, img_out_base, anno_out, spacing),)))
+            train_patient_names.append(casename)
+    for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
+        test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
+        for t in test_cases:
+            casename = test_sequence + "_" + t[:-4]
+            img_file = join(base + '_test', test_sequence, t)
+            lab_file = None
+            img_out_base = join(imagests, casename)
+            anno_out = None
+            res.append(
+                p.starmap_async(load_tiff_convert_to_nifti, ((img_file, lab_file, img_out_base, anno_out, spacing),)))
+            test_patient_names.append(casename)
+    _ = [i.get() for i in res]
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "BF",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "cell",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    p.close()
+    p.join()
+if __name__ == "__main__":
+    base = "/media/fabian/My Book/datasets/CellTrackingChallenge/Fluo-C3DH-A549_ManAndSim"
+    task_id = 75
+    task_name = 'Fluo_C3DH_A549_ManAndSim'
+    spacing = (1, 0.126, 0.126)
+    prepare_task(base, task_id, task_name, spacing)
+    task_name = "Task075_Fluo_C3DH_A549_ManAndSim"
+    labelsTr = join(nnUNet_raw_data, task_name, "labelsTr")
+    cases = subfiles(labelsTr, suffix='.nii.gz', join=False)
+    splits = []
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('01_') or i.startswith('02_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('02_') and not i.startswith('02_SIM')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('02_') or i.startswith('01_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('01_') and not i.startswith('01_SIM')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('01_') or i.startswith('02_') and not i.startswith('02_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('02_SIM')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('02_') or i.startswith('01_') and not i.startswith('01_SIM')],
+         'val': [i[:-7] for i in cases if i.startswith('01_SIM')]}
+    )
+    save_pickle(splits, join(preprocessing_output_dir, task_name, "splits_final.pkl"))

nnunet/dataset_conversion/Task076_Fluo_N3DH_SIM.py ADDED Viewed

	@@ -0,0 +1,312 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+from multiprocessing import Pool
+from multiprocessing.dummy import Pool
+import SimpleITK as sitk
+import numpy as np
+from batchgenerators.utilities.file_and_folder_operations import *
+from skimage.io import imread
+from skimage.io import imsave
+from skimage.morphology import ball
+from skimage.morphology import erosion
+from skimage.transform import resize
+from nnunet.paths import nnUNet_raw_data
+from nnunet.paths import preprocessing_output_dir
+def load_bmp_convert_to_nifti_borders(img_file, lab_file, img_out_base, anno_out, spacing, border_thickness=0.7):
+    img = imread(img_file)
+    img_itk = sitk.GetImageFromArray(img.astype(np.float32))
+    img_itk.SetSpacing(np.array(spacing)[::-1])
+    sitk.WriteImage(img_itk, join(img_out_base + "_0000.nii.gz"))
+    if lab_file is not None:
+        l = imread(lab_file)
+        borders = generate_border_as_suggested_by_twollmann(l, spacing, border_thickness)
+        l[l > 0] = 1
+        l[borders == 1] = 2
+        l_itk = sitk.GetImageFromArray(l.astype(np.uint8))
+        l_itk.SetSpacing(np.array(spacing)[::-1])
+        sitk.WriteImage(l_itk, anno_out)
+def generate_ball(spacing, radius, dtype=int):
+    radius_in_voxels = np.round(radius / np.array(spacing)).astype(int)
+    n = 2 * radius_in_voxels + 1
+    ball_iso = ball(max(n) * 2, dtype=np.float64)
+    ball_resampled = resize(ball_iso, n, 1, 'constant', 0, clip=True, anti_aliasing=False, preserve_range=True)
+    ball_resampled[ball_resampled > 0.5] = 1
+    ball_resampled[ball_resampled <= 0.5] = 0
+    return ball_resampled.astype(dtype)
+def generate_border_as_suggested_by_twollmann(label_img: np.ndarray, spacing, border_thickness: float = 2) -> np.ndarray:
+    border = np.zeros_like(label_img)
+    selem = generate_ball(spacing, border_thickness)
+    for l in np.unique(label_img):
+        if l == 0: continue
+        mask = (label_img == l).astype(int)
+        eroded = erosion(mask, selem)
+        border[(eroded == 0) & (mask != 0)] = 1
+    return border
+def find_differences(labelstr1, labelstr2):
+    for n in subfiles(labelstr1, suffix='.nii.gz', join=False):
+        a = sitk.GetArrayFromImage(sitk.ReadImage(join(labelstr1, n)))
+        b = sitk.GetArrayFromImage(sitk.ReadImage(join(labelstr2, n)))
+        print(n, np.sum(a != b))
+def prepare_task(base, task_id, task_name, spacing, border_thickness: float = 15, processes: int = 16):
+    p = Pool(processes)
+    foldername = "Task%03.0d_%s" % (task_id, task_name)
+    out_base = join(nnUNet_raw_data, foldername)
+    imagestr = join(out_base, "imagesTr")
+    imagests = join(out_base, "imagesTs")
+    labelstr = join(out_base, "labelsTr")
+    maybe_mkdir_p(imagestr)
+    maybe_mkdir_p(imagests)
+    maybe_mkdir_p(labelstr)
+    train_patient_names = []
+    test_patient_names = []
+    res = []
+    for train_sequence in [i for i in subfolders(base + "_train", join=False) if not i.endswith("_GT")]:
+        train_cases = subfiles(join(base + '_train', train_sequence), suffix=".tif", join=False)
+        for t in train_cases:
+            casename = train_sequence + "_" + t[:-4]
+            img_file = join(base + '_train', train_sequence, t)
+            lab_file = join(base + '_train', train_sequence + "_GT", "SEG", "man_seg" + t[1:])
+            if not isfile(lab_file):
+                continue
+            img_out_base = join(imagestr, casename)
+            anno_out = join(labelstr, casename + ".nii.gz")
+            res.append(
+                p.starmap_async(load_bmp_convert_to_nifti_borders, ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
+            train_patient_names.append(casename)
+    for test_sequence in [i for i in subfolders(base + "_test", join=False) if not i.endswith("_GT")]:
+        test_cases = subfiles(join(base + '_test', test_sequence), suffix=".tif", join=False)
+        for t in test_cases:
+            casename = test_sequence + "_" + t[:-4]
+            img_file = join(base + '_test', test_sequence, t)
+            lab_file = None
+            img_out_base = join(imagests, casename)
+            anno_out = None
+            res.append(
+                p.starmap_async(load_bmp_convert_to_nifti_borders, ((img_file, lab_file, img_out_base, anno_out, spacing, border_thickness),)))
+            test_patient_names.append(casename)
+    _ = [i.get() for i in res]
+    json_dict = {}
+    json_dict['name'] = task_name
+    json_dict['description'] = ""
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = ""
+    json_dict['licence'] = ""
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "BF",
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "cell",
+        "2": "border",
+    }
+    json_dict['numTraining'] = len(train_patient_names)
+    json_dict['numTest'] = len(test_patient_names)
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             train_patient_names]
+    json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_patient_names]
+    save_json(json_dict, os.path.join(out_base, "dataset.json"))
+    p.close()
+    p.join()
+def plot_images(folder, output_folder):
+    maybe_mkdir_p(output_folder)
+    import matplotlib.pyplot as plt
+    for i in subfiles(folder, suffix='.nii.gz', join=False):
+        img = sitk.GetArrayFromImage(sitk.ReadImage(join(folder, i)))
+        center_slice = img[img.shape[0]//2]
+        plt.imsave(join(output_folder, i[:-7] + '.png'), center_slice)
+def convert_to_tiff(nifti_image: str, output_name: str):
+    npy = sitk.GetArrayFromImage(sitk.ReadImage(nifti_image))
+    imsave(output_name, npy.astype(np.uint16),  compress=6)
+def convert_to_instance_seg(arr: np.ndarray, spacing: tuple = (0.2, 0.125, 0.125)):
+    from skimage.morphology import label, dilation
+    # 1 is core, 2 is border
+    objects = label((arr == 1).astype(int))
+    final = np.copy(objects)
+    remaining_border = arr == 2
+    current = np.copy(objects)
+    dilated_mm = np.array((0, 0, 0))
+    spacing = np.array(spacing)
+    while np.sum(remaining_border) > 0:
+        strel_size = [0, 0, 0]
+        maximum_dilation = max(dilated_mm)
+        for i in range(3):
+            if spacing[i] == min(spacing):
+                strel_size[i] = 1
+                continue
+            if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
+                strel_size[i] = 1
+        ball_here = ball(1)
+        if strel_size[0] == 0: ball_here = ball_here[1:2]
+        if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
+        if strel_size[2] == 0: ball_here = ball_here[:, :, 1:2]
+        #print(1)
+        dilated = dilation(current, ball_here)
+        diff = (current == 0) & (dilated != current)
+        final[diff & remaining_border] = dilated[diff & remaining_border]
+        remaining_border[diff] = 0
+        current = dilated
+        dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(3)]
+    return final.astype(np.uint32)
+def convert_to_instance_seg2(arr: np.ndarray, spacing: tuple = (0.2, 0.125, 0.125), small_center_threshold=30,
+                             isolated_border_as_separate_instance_threshold: int = 15):
+    from skimage.morphology import label, dilation
+    # we first identify centers that are too small and set them to be border. This should remove false positive instances
+    objects = label((arr == 1).astype(int))
+    for o in np.unique(objects):
+        if o > 0 and np.sum(objects == o) <= small_center_threshold:
+            arr[objects == o] = 2
+    # 1 is core, 2 is border
+    objects = label((arr == 1).astype(int))
+    final = np.copy(objects)
+    remaining_border = arr == 2
+    current = np.copy(objects)
+    dilated_mm = np.array((0, 0, 0))
+    spacing = np.array(spacing)
+    while np.sum(remaining_border) > 0:
+        strel_size = [0, 0, 0]
+        maximum_dilation = max(dilated_mm)
+        for i in range(3):
+            if spacing[i] == min(spacing):
+                strel_size[i] = 1
+                continue
+            if dilated_mm[i] + spacing[i] / 2 < maximum_dilation:
+                strel_size[i] = 1
+        ball_here = ball(1)
+        if strel_size[0] == 0: ball_here = ball_here[1:2]
+        if strel_size[1] == 0: ball_here = ball_here[:, 1:2]
+        if strel_size[2] == 0: ball_here = ball_here[:, :, 1:2]
+        #print(1)
+        dilated = dilation(current, ball_here)
+        diff = (current == 0) & (dilated != current)
+        final[diff & remaining_border] = dilated[diff & remaining_border]
+        remaining_border[diff] = 0
+        current = dilated
+        dilated_mm = [dilated_mm[i] + spacing[i] if strel_size[i] == 1 else dilated_mm[i] for i in range(3)]
+    # what can happen is that a cell is so small that the network only predicted border and no core. This cell will be
+    # fused with the nearest other instance, which we don't want. Therefore we identify isolated border predictions and
+    # give them a separate instance id
+    # we identify isolated border predictions by checking each foreground object in arr and see whether this object
+    # also contains label 1
+    max_label = np.max(final)
+    foreground_objects = label((arr != 0).astype(int))
+    for i in np.unique(foreground_objects):
+        if i > 0 and (1 not in np.unique(arr[foreground_objects==i])):
+            size_of_object = np.sum(foreground_objects==i)
+            if size_of_object >= isolated_border_as_separate_instance_threshold:
+                final[foreground_objects == i] = max_label + 1
+                max_label += 1
+                #print('yeah boi')
+    return final.astype(np.uint32)
+def load_instanceseg_save(in_file: str, out_file:str, better: bool):
+    itk_img = sitk.ReadImage(in_file)
+    if not better:
+        instanceseg = convert_to_instance_seg(sitk.GetArrayFromImage(itk_img))
+    else:
+        instanceseg = convert_to_instance_seg2(sitk.GetArrayFromImage(itk_img))
+    itk_out = sitk.GetImageFromArray(instanceseg)
+    itk_out.CopyInformation(itk_img)
+    sitk.WriteImage(itk_out, out_file)
+def convert_all_to_instance(input_folder: str, output_folder: str, processes: int = 24, better: bool = False):
+    maybe_mkdir_p(output_folder)
+    p = Pool(processes)
+    files = subfiles(input_folder, suffix='.nii.gz', join=False)
+    output_files = [join(output_folder, i) for i in files]
+    input_files = [join(input_folder, i) for i in files]
+    better = [better] * len(files)
+    r = p.starmap_async(load_instanceseg_save, zip(input_files, output_files, better))
+    _ = r.get()
+    p.close()
+    p.join()
+if __name__ == "__main__":
+    base = "/home/fabian/data/Fluo-N3DH-SIM"
+    task_id = 76
+    task_name = 'Fluo_N3DH_SIM'
+    spacing = (0.2, 0.125, 0.125)
+    border_thickness = 0.5
+    prepare_task(base, task_id, task_name, spacing, border_thickness, 12)
+    # we need custom splits
+    task_name = "Task076_Fluo_N3DH_SIM"
+    labelsTr = join(nnUNet_raw_data, task_name, "labelsTr")
+    cases = subfiles(labelsTr, suffix='.nii.gz', join=False)
+    splits = []
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('01_')],
+         'val': [i[:-7] for i in cases if i.startswith('02_')]}
+    )
+    splits.append(
+        {'train': [i[:-7] for i in cases if i.startswith('02_')],
+         'val': [i[:-7] for i in cases if i.startswith('01_')]}
+    )
+    maybe_mkdir_p(join(preprocessing_output_dir, task_name))
+    save_pickle(splits, join(preprocessing_output_dir, task_name, "splits_final.pkl"))
+    # test set was converted to instance seg with convert_all_to_instance with better=True
+    # convert to tiff with convert_to_tiff

nnunet/dataset_conversion/Task082_BraTS_2020.py ADDED Viewed

	@@ -0,0 +1,751 @@

+#    Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+import shutil
+from collections import OrderedDict
+from copy import deepcopy
+from multiprocessing.pool import Pool
+from typing import Tuple
+import SimpleITK as sitk
+import numpy as np
+import scipy.stats as ss
+from batchgenerators.utilities.file_and_folder_operations import *
+from medpy.metric import dc, hd95
+from nnunet.dataset_conversion.Task032_BraTS_2018 import convert_labels_back_to_BraTS_2018_2019_convention
+from nnunet.dataset_conversion.Task043_BraTS_2019 import copy_BraTS_segmentation_and_convert_labels
+from nnunet.evaluation.region_based_evaluation import get_brats_regions, evaluate_regions
+from nnunet.paths import nnUNet_raw_data
+from nnunet.postprocessing.consolidate_postprocessing import collect_cv_niftis
+def apply_brats_threshold(fname, out_dir, threshold, replace_with):
+    img_itk = sitk.ReadImage(fname)
+    img_npy = sitk.GetArrayFromImage(img_itk)
+    s = np.sum(img_npy == 3)
+    if s < threshold:
+        # print(s, fname)
+        img_npy[img_npy == 3] = replace_with
+    img_itk_postprocessed = sitk.GetImageFromArray(img_npy)
+    img_itk_postprocessed.CopyInformation(img_itk)
+    sitk.WriteImage(img_itk_postprocessed, join(out_dir, fname.split("/")[-1]))
+def load_niftis_threshold_compute_dice(gt_file, pred_file, thresholds: Tuple[list, tuple]):
+    gt = sitk.GetArrayFromImage(sitk.ReadImage(gt_file))
+    pred = sitk.GetArrayFromImage(sitk.ReadImage(pred_file))
+    mask_pred = pred == 3
+    mask_gt = gt == 3
+    num_pred = np.sum(mask_pred)
+    num_gt = np.sum(mask_gt)
+    dice = dc(mask_pred, mask_gt)
+    res_dice = {}
+    res_was_smaller = {}
+    for t in thresholds:
+        was_smaller = False
+        if num_pred < t:
+            was_smaller = True
+            if num_gt == 0:
+                dice_here = 1.
+            else:
+                dice_here = 0.
+        else:
+            dice_here = deepcopy(dice)
+        res_dice[t] = dice_here
+        res_was_smaller[t] = was_smaller
+    return res_was_smaller, res_dice
+def apply_threshold_to_folder(folder_in, folder_out, threshold, replace_with, processes=24):
+    maybe_mkdir_p(folder_out)
+    niftis = subfiles(folder_in, suffix='.nii.gz', join=True)
+    p = Pool(processes)
+    p.starmap(apply_brats_threshold, zip(niftis, [folder_out]*len(niftis), [threshold]*len(niftis), [replace_with] * len(niftis)))
+    p.close()
+    p.join()
+def determine_brats_postprocessing(folder_with_preds, folder_with_gt, postprocessed_output_dir, processes=8,
+        thresholds=(0, 10, 50, 100, 200, 500, 750, 1000, 1500, 2500, 10000), replace_with=2):
+    # find pairs
+    nifti_gt = subfiles(folder_with_gt, suffix=".nii.gz", sort=True)
+    p = Pool(processes)
+    nifti_pred = subfiles(folder_with_preds, suffix='.nii.gz', sort=True)
+    results = p.starmap_async(load_niftis_threshold_compute_dice, zip(nifti_gt, nifti_pred, [thresholds] * len(nifti_pred)))
+    results = results.get()
+    all_dc_per_threshold = {}
+    for t in thresholds:
+        all_dc_per_threshold[t] = np.array([i[1][t] for i in results])
+        print(t, np.mean(all_dc_per_threshold[t]))
+    means = [np.mean(all_dc_per_threshold[t]) for t in thresholds]
+    best_threshold = thresholds[np.argmax(means)]
+    print('best', best_threshold, means[np.argmax(means)])
+    maybe_mkdir_p(postprocessed_output_dir)
+    p.starmap(apply_brats_threshold, zip(nifti_pred, [postprocessed_output_dir]*len(nifti_pred), [best_threshold]*len(nifti_pred), [replace_with] * len(nifti_pred)))
+    p.close()
+    p.join()
+    save_pickle((thresholds, means, best_threshold, all_dc_per_threshold), join(postprocessed_output_dir, "threshold.pkl"))
+def collect_and_prepare(base_dir, num_processes = 12, clean=False):
+    """
+    collect all cv_niftis, compute brats metrics, compute enh tumor thresholds and summarize in csv
+    :param base_dir:
+    :return:
+    """
+    out = join(base_dir, 'cv_results')
+    out_pp = join(base_dir, 'cv_results_pp')
+    experiments = subfolders(base_dir, join=False, prefix='nnUNetTrainer')
+    regions = get_brats_regions()
+    gt_dir = join(base_dir, 'gt_niftis')
+    replace_with = 2
+    failed = []
+    successful = []
+    for e in experiments:
+        print(e)
+        try:
+            o = join(out, e)
+            o_p = join(out_pp, e)
+            maybe_mkdir_p(o)
+            maybe_mkdir_p(o_p)
+            collect_cv_niftis(join(base_dir, e), o)
+            if clean or not isfile(join(o, 'summary.csv')):
+                evaluate_regions(o, gt_dir, regions, num_processes)
+            if clean or not isfile(join(o_p, 'threshold.pkl')):
+                determine_brats_postprocessing(o, gt_dir, o_p, num_processes, thresholds=list(np.arange(0, 760, 10)), replace_with=replace_with)
+            if clean or not isfile(join(o_p, 'summary.csv')):
+                evaluate_regions(o_p, gt_dir, regions, num_processes)
+            successful.append(e)
+        except Exception as ex:
+            print("\nERROR\n", e, ex, "\n")
+            failed.append(e)
+    # we are interested in the mean (nan is 1) column
+    with open(join(base_dir, 'cv_summary.csv'), 'w') as f:
+        f.write('name,whole,core,enh,mean\n')
+        for e in successful:
+            expected_nopp = join(out, e, 'summary.csv')
+            expected_pp = join(out, out_pp, e, 'summary.csv')
+            if isfile(expected_nopp):
+                res = np.loadtxt(expected_nopp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_noPP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+            if isfile(expected_pp):
+                res = np.loadtxt(expected_pp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_PP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+    # this just crawls the folders and evaluates what it finds
+    with open(join(base_dir, 'cv_summary2.csv'), 'w') as f:
+        for folder in ['cv_results', 'cv_results_pp']:
+            for ex in subdirs(join(base_dir, folder), join=False):
+                print(folder, ex)
+                expected = join(base_dir, folder, ex, 'summary.csv')
+                if clean or not isfile(expected):
+                    evaluate_regions(join(base_dir, folder, ex), gt_dir, regions, num_processes)
+                if isfile(expected):
+                    res = np.loadtxt(expected, dtype=str, skiprows=0, delimiter=',')[-2]
+                    as_numeric = [float(i) for i in res[1:]]
+                    f.write('%s__%s,' % (folder, ex))
+                    f.write("%0.4f," % as_numeric[0])
+                    f.write("%0.4f," % as_numeric[1])
+                    f.write("%0.4f," % as_numeric[2])
+                    f.write("%0.4f\n" % np.mean(as_numeric))
+        f.write('name,whole,core,enh,mean\n')
+        for e in successful:
+            expected_nopp = join(out, e, 'summary.csv')
+            expected_pp = join(out, out_pp, e, 'summary.csv')
+            if isfile(expected_nopp):
+                res = np.loadtxt(expected_nopp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_noPP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+            if isfile(expected_pp):
+                res = np.loadtxt(expected_pp, dtype=str, skiprows=0, delimiter=',')[-2]
+                as_numeric = [float(i) for i in res[1:]]
+                f.write(e + '_PP,')
+                f.write("%0.4f," % as_numeric[0])
+                f.write("%0.4f," % as_numeric[1])
+                f.write("%0.4f," % as_numeric[2])
+                f.write("%0.4f\n" % np.mean(as_numeric))
+    # apply threshold to val set
+    expected_num_cases = 125
+    missing_valset = []
+    has_val_pred = []
+    for e in successful:
+        if isdir(join(base_dir, 'predVal', e)):
+            currdir = join(base_dir, 'predVal', e)
+            files = subfiles(currdir, suffix='.nii.gz', join=False)
+            if len(files) != expected_num_cases:
+                print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases))
+                continue
+            output_folder = join(base_dir, 'predVal_PP', e)
+            maybe_mkdir_p(output_folder)
+            threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
+            if threshold > 1000: threshold = 750  # don't make it too big!
+            apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
+            has_val_pred.append(e)
+        else:
+            print(e, 'has no valset predictions')
+            missing_valset.append(e)
+    # 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold' needs special treatment
+    e = 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5'
+    currdir = join(base_dir, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    output_folder = join(base_dir, 'predVal_PP', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    maybe_mkdir_p(output_folder)
+    threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
+    if threshold > 1000: threshold = 750  # don't make it too big!
+    apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
+    # 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold' needs special treatment
+    e = 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5'
+    currdir = join(base_dir, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    output_folder = join(base_dir, 'predVal_PP', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    maybe_mkdir_p(output_folder)
+    threshold = load_pickle(join(out_pp, e, 'threshold.pkl'))[2]
+    if threshold > 1000: threshold = 750  # don't make it too big!
+    apply_threshold_to_folder(currdir, output_folder, threshold, replace_with, num_processes)
+    # convert val set to brats labels for submission
+    output_converted = join(base_dir, 'converted_valSet')
+    for source in ['predVal', 'predVal_PP']:
+        for e in has_val_pred + ['nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold']:
+            expected_source_folder = join(base_dir, source, e)
+            if not isdir(expected_source_folder):
+                print(e, 'has no', source)
+                raise RuntimeError()
+            files = subfiles(expected_source_folder, suffix='.nii.gz', join=False)
+            if len(files) != expected_num_cases:
+                print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases))
+                continue
+            target_folder = join(output_converted, source, e)
+            maybe_mkdir_p(target_folder)
+            convert_labels_back_to_BraTS_2018_2019_convention(expected_source_folder, target_folder)
+    summarize_validation_set_predictions(output_converted)
+def summarize_validation_set_predictions(base):
+    with open(join(base, 'summary.csv'), 'w') as f:
+        f.write('name,whole,core,enh,mean,whole,core,enh,mean\n')
+        for subf in subfolders(base, join=False):
+            for e in subfolders(join(base, subf), join=False):
+                expected = join(base, subf, e, 'Stats_Validation_final.csv')
+                if not isfile(expected):
+                    print(subf, e, 'has missing csv')
+                    continue
+                a = np.loadtxt(expected, delimiter=',', dtype=str)
+                assert a.shape[0] == 131, 'did not evaluate all 125 cases!'
+                selected_row = a[-5]
+                values = [float(i) for i in selected_row[1:4]]
+                f.write(e + "_" + subf + ',')
+                f.write("%0.4f," % values[1])
+                f.write("%0.4f," % values[2])
+                f.write("%0.4f," % values[0])
+                f.write("%0.4f," % np.mean(values))
+                values = [float(i) for i in selected_row[-3:]]
+                f.write("%0.4f," % values[1])
+                f.write("%0.4f," % values[2])
+                f.write("%0.4f," % values[0])
+                f.write("%0.4f\n" % np.mean(values))
+def compute_BraTS_dice(ref, pred):
+    """
+    ref and gt are binary integer numpy.ndarray s
+    :param ref:
+    :param gt:
+    :return:
+    """
+    num_ref = np.sum(ref)
+    num_pred = np.sum(pred)
+    if num_ref == 0:
+        if num_pred == 0:
+            return 1
+        else:
+            return 0
+    else:
+        return dc(pred, ref)
+def convert_all_to_BraTS(input_folder, output_folder, expected_num_cases=125):
+    for s in subdirs(input_folder, join=False):
+        nii = subfiles(join(input_folder, s), suffix='.nii.gz', join=False)
+        if len(nii) != expected_num_cases:
+            print(s)
+        else:
+            target_dir = join(output_folder, s)
+            convert_labels_back_to_BraTS_2018_2019_convention(join(input_folder, s), target_dir, num_processes=6)
+def compute_BraTS_HD95(ref, pred):
+    """
+    ref and gt are binary integer numpy.ndarray s
+    spacing is assumed to be (1, 1, 1)
+    :param ref:
+    :param pred:
+    :return:
+    """
+    num_ref = np.sum(ref)
+    num_pred = np.sum(pred)
+    if num_ref == 0:
+        if num_pred == 0:
+            return 0
+        else:
+            return 373.12866
+    elif num_pred == 0 and num_ref != 0:
+        return 373.12866
+    else:
+        return hd95(pred, ref, (1, 1, 1))
+def evaluate_BraTS_case(arr: np.ndarray, arr_gt: np.ndarray):
+    """
+    attempting to reimplement the brats evaluation scheme
+    assumes edema=1, non_enh=2, enh=3
+    :param arr:
+    :param arr_gt:
+    :return:
+    """
+    # whole tumor
+    mask_gt = (arr_gt != 0).astype(int)
+    mask_pred = (arr != 0).astype(int)
+    dc_whole = compute_BraTS_dice(mask_gt, mask_pred)
+    hd95_whole = compute_BraTS_HD95(mask_gt, mask_pred)
+    del mask_gt, mask_pred
+    # tumor core
+    mask_gt = (arr_gt > 1).astype(int)
+    mask_pred = (arr > 1).astype(int)
+    dc_core = compute_BraTS_dice(mask_gt, mask_pred)
+    hd95_core = compute_BraTS_HD95(mask_gt, mask_pred)
+    del mask_gt, mask_pred
+    # enhancing
+    mask_gt = (arr_gt == 3).astype(int)
+    mask_pred = (arr == 3).astype(int)
+    dc_enh = compute_BraTS_dice(mask_gt, mask_pred)
+    hd95_enh = compute_BraTS_HD95(mask_gt, mask_pred)
+    del mask_gt, mask_pred
+    return dc_whole, dc_core, dc_enh, hd95_whole, hd95_core, hd95_enh
+def load_evaluate(filename_gt: str, filename_pred: str):
+    arr_pred = sitk.GetArrayFromImage(sitk.ReadImage(filename_pred))
+    arr_gt = sitk.GetArrayFromImage(sitk.ReadImage(filename_gt))
+    return evaluate_BraTS_case(arr_pred, arr_gt)
+def evaluate_BraTS_folder(folder_pred, folder_gt, num_processes: int = 24, strict=False):
+    nii_pred = subfiles(folder_pred, suffix='.nii.gz', join=False)
+    if len(nii_pred) == 0:
+        return
+    nii_gt = subfiles(folder_gt, suffix='.nii.gz', join=False)
+    assert all([i in nii_gt for i in nii_pred]), 'not all predicted niftis have a reference file!'
+    if strict:
+        assert all([i in nii_pred for i in nii_gt]), 'not all gt niftis have a predicted file!'
+    p = Pool(num_processes)
+    nii_pred_fullpath = [join(folder_pred, i) for i in nii_pred]
+    nii_gt_fullpath = [join(folder_gt, i) for i in nii_pred]
+    results = p.starmap(load_evaluate, zip(nii_gt_fullpath, nii_pred_fullpath))
+    # now write to output file
+    with open(join(folder_pred, 'results.csv'), 'w') as f:
+        f.write("name,dc_whole,dc_core,dc_enh,hd95_whole,hd95_core,hd95_enh\n")
+        for fname, r in zip(nii_pred, results):
+            f.write(fname)
+            f.write(",%0.4f,%0.4f,%0.4f,%3.3f,%3.3f,%3.3f\n" % r)
+def load_csv_for_ranking(csv_file: str):
+    res = np.loadtxt(csv_file, dtype='str', delimiter=',')
+    scores = res[1:, [1, 2, 3, -3, -2, -1]].astype(float)
+    scores[:, -3:] *= -1
+    scores[:, -3:] += 373.129
+    assert np.all(scores <= 373.129)
+    assert np.all(scores >= 0)
+    return scores
+def rank_algorithms(data:np.ndarray):
+    """
+    data is (metrics x experiments x cases)
+    :param data:
+    :return:
+    """
+    num_metrics, num_experiments, num_cases = data.shape
+    ranks = np.zeros((num_metrics, num_experiments))
+    for m in range(6):
+        r = np.apply_along_axis(ss.rankdata, 0, -data[m], 'min')
+        ranks[m] = r.mean(1)
+    average_rank = np.mean(ranks, 0)
+    final_ranks = ss.rankdata(average_rank, 'min')
+    return final_ranks, average_rank, ranks
+def score_and_postprocess_model_based_on_rank_then_aggregate():
+    """
+    Similarly to BraTS 2017 - BraTS 2019, each participant will be ranked for each of the X test cases. Each case
+    includes 3 regions of evaluation, and the metrics used to produce the rankings will be the Dice Similarity
+    Coefficient and the 95% Hausdorff distance. Thus, for X number of cases included in the BraTS 2020, each
+    participant ends up having X*3*2 rankings. The final ranking score is the average of all these rankings normalized
+    by the number of teams.
+    https://zenodo.org/record/3718904
+    -> let's optimize for this.
+    Important: the outcome very much depends on the competing models. We need some references. We only got our own,
+    so let's hope this still works
+    :return:
+    """
+    base = "/media/fabian/Results/nnUNet/3d_fullres/Task082_BraTS2020"
+    replace_with = 2
+    num_processes = 24
+    expected_num_cases_val = 125
+    # use a separate output folder from the previous experiments to ensure we are not messing things up
+    output_base_here = join(base, 'use_brats_ranking')
+    maybe_mkdir_p(output_base_here)
+    # collect cv niftis and compute metrics with evaluate_BraTS_folder to ensure we work with the same metrics as brats
+    out = join(output_base_here, 'cv_results')
+    experiments = subfolders(base, join=False, prefix='nnUNetTrainer')
+    gt_dir = join(base, 'gt_niftis')
+    experiments_with_full_cv = []
+    for e in experiments:
+        print(e)
+        o = join(out, e)
+        maybe_mkdir_p(o)
+        try:
+            collect_cv_niftis(join(base, e), o)
+            if not isfile(join(o, 'results.csv')):
+                evaluate_BraTS_folder(o, gt_dir, num_processes, strict=True)
+            experiments_with_full_cv.append(e)
+        except Exception as ex:
+            print("\nERROR\n", e, ex, "\n")
+            if isfile(join(o, 'results.csv')):
+                os.remove(join(o, 'results.csv'))
+    # rank the non-postprocessed models
+    tmp = np.loadtxt(join(out, experiments_with_full_cv[0], 'results.csv'), dtype='str', delimiter=',')
+    num_cases = len(tmp) - 1
+    data_for_ranking = np.zeros((6, len(experiments_with_full_cv), num_cases))
+    for i, e in enumerate(experiments_with_full_cv):
+        scores = load_csv_for_ranking(join(out, e, 'results.csv'))
+        for metric in range(6):
+            data_for_ranking[metric, i] = scores[:, metric]
+    final_ranks, average_rank, ranks = rank_algorithms(data_for_ranking)
+    for t in np.argsort(final_ranks):
+        print(final_ranks[t], average_rank[t], experiments_with_full_cv[t])
+    # for each model, create output directories with different thresholds. evaluate ALL OF THEM (might take a while lol)
+    thresholds = np.arange(25, 751, 25)
+    output_pp_tmp = join(output_base_here, 'cv_determine_pp_thresholds')
+    for e in experiments_with_full_cv:
+        input_folder = join(out, e)
+        for t in thresholds:
+            output_directory = join(output_pp_tmp, e, str(t))
+            maybe_mkdir_p(output_directory)
+            if not isfile(join(output_directory, 'results.csv')):
+                apply_threshold_to_folder(input_folder, output_directory, t, replace_with, processes=16)
+                evaluate_BraTS_folder(output_directory, gt_dir, num_processes)
+    # load ALL the results!
+    results = []
+    experiment_names = []
+    for e in experiments_with_full_cv:
+        for t in thresholds:
+            output_directory = join(output_pp_tmp, e, str(t))
+            expected_file = join(output_directory, 'results.csv')
+            if not isfile(expected_file):
+                print(e, 'does not have a results file for threshold', t)
+                continue
+            results.append(load_csv_for_ranking(expected_file))
+            experiment_names.append("%s___%d" % (e, t))
+    all_results = np.concatenate([i[None] for i in results], 0).transpose((2, 0, 1))
+    # concatenate with non postprocessed models
+    all_results = np.concatenate((data_for_ranking, all_results), 1)
+    experiment_names += experiments_with_full_cv
+    final_ranks, average_rank, ranks = rank_algorithms(all_results)
+    for t in np.argsort(final_ranks):
+        print(final_ranks[t], average_rank[t], experiment_names[t])
+    # for each model, print the non postprocessed model as well as the best postprocessed model. If there are
+    # validation set predictions, apply the best threshold to the validation set
+    pred_val_base = join(base, 'predVal_PP_rank')
+    has_val_pred = []
+    for e in experiments_with_full_cv:
+        rank_nonpp = final_ranks[experiment_names.index(e)]
+        avg_rank_nonpp = average_rank[experiment_names.index(e)]
+        print(e, avg_rank_nonpp, rank_nonpp)
+        predicted_val = join(base, 'predVal', e)
+        pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
+        if len(pp_models) > 0:
+            ranks = [final_ranks[i] for i in pp_models]
+            best_idx = np.argmin(ranks)
+            best = experiment_names[pp_models[best_idx]]
+            best_avg_rank = average_rank[pp_models[best_idx]]
+            print(best, best_avg_rank, min(ranks))
+            print('')
+            # apply threshold to validation set
+            best_threshold = int(best.split('___')[-1])
+            if not isdir(predicted_val):
+                print(e, 'has not valset predictions')
+            else:
+                files = subfiles(predicted_val, suffix='.nii.gz')
+                if len(files) != expected_num_cases_val:
+                    print(e, 'has missing val cases. found: %d expected: %d' % (len(files), expected_num_cases_val))
+                else:
+                    apply_threshold_to_folder(predicted_val, join(pred_val_base, e), best_threshold, replace_with, num_processes)
+                    has_val_pred.append(e)
+        else:
+            print(e, 'not found in ranking')
+    # apply nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5 to nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold
+    e = 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5'
+    pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
+    ranks = [final_ranks[i] for i in pp_models]
+    best_idx = np.argmin(ranks)
+    best = experiment_names[pp_models[best_idx]]
+    best_avg_rank = average_rank[pp_models[best_idx]]
+    best_threshold = int(best.split('___')[-1])
+    predicted_val = join(base, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    apply_threshold_to_folder(predicted_val, join(pred_val_base, 'nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold'), best_threshold, replace_with, num_processes)
+    has_val_pred.append('nnUNetTrainerV2BraTSRegions_DA3_BN__nnUNetPlansv2.1_bs5_15fold')
+    # apply nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5 to nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold
+    e = 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5'
+    pp_models = [j for j, i in enumerate(experiment_names) if i.split("___")[0] == e and i != e]
+    ranks = [final_ranks[i] for i in pp_models]
+    best_idx = np.argmin(ranks)
+    best = experiment_names[pp_models[best_idx]]
+    best_avg_rank = average_rank[pp_models[best_idx]]
+    best_threshold = int(best.split('___')[-1])
+    predicted_val = join(base, 'predVal', 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    apply_threshold_to_folder(predicted_val, join(pred_val_base, 'nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold'), best_threshold, replace_with, num_processes)
+    has_val_pred.append('nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold')
+    # convert valsets
+    output_converted = join(base, 'converted_valSet')
+    for e in has_val_pred:
+        expected_source_folder = join(base, 'predVal_PP_rank', e)
+        if not isdir(expected_source_folder):
+            print(e, 'has no predVal_PP_rank')
+            raise RuntimeError()
+        files = subfiles(expected_source_folder, suffix='.nii.gz', join=False)
+        if len(files) != expected_num_cases_val:
+            print(e, 'prediction not done, found %d files, expected %s' % (len(files), expected_num_cases_val))
+            continue
+        target_folder = join(output_converted, 'predVal_PP_rank', e)
+        maybe_mkdir_p(target_folder)
+        convert_labels_back_to_BraTS_2018_2019_convention(expected_source_folder, target_folder)
+    # now load all the csvs for the validation set (obtained from evaluation platform) and rank our models on the
+    # validation set
+    flds = subdirs(output_converted, join=False)
+    results_valset = []
+    names_valset = []
+    for f in flds:
+        curr = join(output_converted, f)
+        experiments = subdirs(curr, join=False)
+        for e in experiments:
+            currr = join(curr, e)
+            expected_file = join(currr, 'Stats_Validation_final.csv')
+            if not isfile(expected_file):
+                print(f, e, "has not been evaluated yet!")
+            else:
+                res = load_csv_for_ranking(expected_file)[:-5]
+                assert res.shape[0] == expected_num_cases_val
+                results_valset.append(res[None])
+                names_valset.append("%s___%s" % (f, e))
+    results_valset = np.concatenate(results_valset, 0)  # experiments x cases x metrics
+    # convert to metrics x experiments x cases
+    results_valset = results_valset.transpose((2, 0, 1))
+    final_ranks, average_rank, ranks = rank_algorithms(results_valset)
+    for t in np.argsort(final_ranks):
+        print(final_ranks[t], average_rank[t], names_valset[t])
+if __name__ == "__main__":
+    """
+    THIS CODE IS A MESS. IT IS PROVIDED AS IS WITH NO GUARANTEES. YOU HAVE TO DIG THROUGH IT YOURSELF. GOOD LUCK ;-)
+    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
+    """
+    task_name = "Task082_BraTS2020"
+    downloaded_data_dir = "/home/fabian/Downloads/MICCAI_BraTS2020_TrainingData"
+    downloaded_data_dir_val = "/home/fabian/Downloads/MICCAI_BraTS2020_ValidationData"
+    target_base = join(nnUNet_raw_data, task_name)
+    target_imagesTr = join(target_base, "imagesTr")
+    target_imagesVal = join(target_base, "imagesVal")
+    target_imagesTs = join(target_base, "imagesTs")
+    target_labelsTr = join(target_base, "labelsTr")
+    maybe_mkdir_p(target_imagesTr)
+    maybe_mkdir_p(target_imagesVal)
+    maybe_mkdir_p(target_imagesTs)
+    maybe_mkdir_p(target_labelsTr)
+    patient_names = []
+    cur = join(downloaded_data_dir)
+    for p in subdirs(cur, join=False):
+        patdir = join(cur, p)
+        patient_name = p
+        patient_names.append(patient_name)
+        t1 = join(patdir, p + "_t1.nii.gz")
+        t1c = join(patdir, p + "_t1ce.nii.gz")
+        t2 = join(patdir, p + "_t2.nii.gz")
+        flair = join(patdir, p + "_flair.nii.gz")
+        seg = join(patdir, p + "_seg.nii.gz")
+        assert all([
+            isfile(t1),
+            isfile(t1c),
+            isfile(t2),
+            isfile(flair),
+            isfile(seg)
+        ]), "%s" % patient_name
+        shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
+        shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
+        shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
+        shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))
+        copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))
+    json_dict = OrderedDict()
+    json_dict['name'] = "BraTS2020"
+    json_dict['description'] = "nothing"
+    json_dict['tensorImageSize'] = "4D"
+    json_dict['reference'] = "see BraTS2020"
+    json_dict['licence'] = "see BraTS2020 license"
+    json_dict['release'] = "0.0"
+    json_dict['modality'] = {
+        "0": "T1",
+        "1": "T1ce",
+        "2": "T2",
+        "3": "FLAIR"
+    }
+    json_dict['labels'] = {
+        "0": "background",
+        "1": "edema",
+        "2": "non-enhancing",
+        "3": "enhancing",
+    }
+    json_dict['numTraining'] = len(patient_names)
+    json_dict['numTest'] = 0
+    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
+                             patient_names]
+    json_dict['test'] = []
+    save_json(json_dict, join(target_base, "dataset.json"))
+    if downloaded_data_dir_val is not None:
+        for p in subdirs(downloaded_data_dir_val, join=False):
+            patdir = join(downloaded_data_dir_val, p)
+            patient_name = p
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesVal, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesVal, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesVal, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesVal, patient_name + "_0003.nii.gz"))
+    downloaded_data_dir_test = "/home/fabian/Downloads/MICCAI_BraTS2020_TestingData"
+    if isdir(downloaded_data_dir_test):
+        for p in subdirs(downloaded_data_dir_test, join=False):
+            patdir = join(downloaded_data_dir_test, p)
+            patient_name = p
+            t1 = join(patdir, p + "_t1.nii.gz")
+            t1c = join(patdir, p + "_t1ce.nii.gz")
+            t2 = join(patdir, p + "_t2.nii.gz")
+            flair = join(patdir, p + "_flair.nii.gz")
+            assert all([
+                isfile(t1),
+                isfile(t1c),
+                isfile(t2),
+                isfile(flair),
+            ]), "%s" % patient_name
+            shutil.copy(t1, join(target_imagesTs, patient_name + "_0000.nii.gz"))
+            shutil.copy(t1c, join(target_imagesTs, patient_name + "_0001.nii.gz"))
+            shutil.copy(t2, join(target_imagesTs, patient_name + "_0002.nii.gz"))
+            shutil.copy(flair, join(target_imagesTs, patient_name + "_0003.nii.gz"))
+    # test set
+    #  nnUNet_ensemble -f nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold -o ensembled_nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold
+    # apply_threshold_to_folder('ensembled_nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold__nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold/', 'ensemble_PP200/', 200, 2)
+    # convert_labels_back_to_BraTS_2018_2019_convention('ensemble_PP200/', 'ensemble_PP200_converted')
+    # export for publication of weights
+    # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA4_BN -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 -t 82 -o nnUNetTrainerV2BraTSRegions_DA4_BN__nnUNetPlansv2.1_bs5_15fold.zip --disable_strict
+    # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA3_BN_BD -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 -t 82 -o nnUNetTrainerV2BraTSRegions_DA3_BN_BD__nnUNetPlansv2.1_bs5_5fold.zip --disable_strict
+    # nnUNet_export_model_to_zip -tr nnUNetTrainerV2BraTSRegions_DA4_BN_BD -pl nnUNetPlansv2.1_bs5 -f 0 1 2 3 4 -t 82 -o nnUNetTrainerV2BraTSRegions_DA4_BN_BD__nnUNetPlansv2.1_bs5_5fold.zip --disable_strict