jgerbscheid commited on
Commit
b9bac12
·
1 Parent(s): bcb2589

initial commit

Browse files
Files changed (44) hide show
  1. README.md +4 -32
  2. app.py +45 -0
  3. dijkprofile-annotator/LICENSE +21 -0
  4. dijkprofile-annotator/conftest.py +10 -0
  5. dijkprofile-annotator/dijkprofile_annotator/__init__.py +13 -0
  6. dijkprofile-annotator/dijkprofile_annotator/__pycache__/__init__.cpython-39.pyc +0 -0
  7. dijkprofile-annotator/dijkprofile_annotator/__pycache__/annotator.cpython-39.pyc +0 -0
  8. dijkprofile-annotator/dijkprofile_annotator/__pycache__/config.cpython-39.pyc +0 -0
  9. dijkprofile-annotator/dijkprofile_annotator/annotator.py +98 -0
  10. dijkprofile-annotator/dijkprofile_annotator/config.py +169 -0
  11. dijkprofile-annotator/dijkprofile_annotator/data/trained_models/dijknet_simple_95.pt +3 -0
  12. dijkprofile-annotator/dijkprofile_annotator/data/trained_models/scaler.pik +0 -0
  13. dijkprofile-annotator/dijkprofile_annotator/dataset/__init__.py +1 -0
  14. dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/__init__.cpython-39.pyc +0 -0
  15. dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/dataset.cpython-39.pyc +0 -0
  16. dijkprofile-annotator/dijkprofile_annotator/dataset/dataset.py +42 -0
  17. dijkprofile-annotator/dijkprofile_annotator/models/__init__.py +1 -0
  18. dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/__init__.cpython-39.pyc +0 -0
  19. dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/dijknet.cpython-39.pyc +0 -0
  20. dijkprofile-annotator/dijkprofile_annotator/models/dijknet.py +109 -0
  21. dijkprofile-annotator/dijkprofile_annotator/preprocessing/__init__.py +8 -0
  22. dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/__init__.cpython-39.pyc +0 -0
  23. dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/link_collector.cpython-39.pyc +0 -0
  24. dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/preprocessing.cpython-39.pyc +0 -0
  25. dijkprofile-annotator/dijkprofile_annotator/preprocessing/preprocessing.py +355 -0
  26. dijkprofile-annotator/dijkprofile_annotator/requirements.txt +7 -0
  27. dijkprofile-annotator/dijkprofile_annotator/training/__init__.py +4 -0
  28. dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/__init__.cpython-39.pyc +0 -0
  29. dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/train.cpython-39.pyc +0 -0
  30. dijkprofile-annotator/dijkprofile_annotator/training/train.py +219 -0
  31. dijkprofile-annotator/dijkprofile_annotator/utils/__init__.py +9 -0
  32. dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  33. dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/utils.cpython-39.pyc +0 -0
  34. dijkprofile-annotator/dijkprofile_annotator/utils/utils.py +350 -0
  35. dijkprofile-annotator/dijkprofile_annotator/web/.gitkeep +0 -0
  36. dijkprofile-annotator/dijkprofile_annotator/web/app.py +45 -0
  37. dijkprofile-annotator/output/models/.gitkeep +0 -0
  38. dijkprofile-annotator/output/models/scaler.pik +0 -0
  39. dijkprofile-annotator/output/reports/.gitkeep +0 -0
  40. dijkprofile-annotator/output/visualizations/.gitkeep +0 -0
  41. dijkprofile-annotator/run/configs/.gitkeep +0 -0
  42. dijkprofile-annotator/setup.cfg +12 -0
  43. dijkprofile-annotator/setup.py +21 -0
  44. requirements.txt +8 -0
README.md CHANGED
@@ -1,37 +1,9 @@
1
  ---
2
- title: Dpa Example
3
- emoji: 🐨
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
9
  ---
10
-
11
- # Configuration
12
-
13
- `title`: _string_
14
- Display title for the Space
15
-
16
- `emoji`: _string_
17
- Space emoji (emoji-only character allowed)
18
-
19
- `colorFrom`: _string_
20
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
-
22
- `colorTo`: _string_
23
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
-
25
- `sdk`: _string_
26
- Can be either `gradio` or `streamlit`
27
-
28
- `sdk_version` : _string_
29
- Only applicable for `streamlit` SDK.
30
- See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31
-
32
- `app_file`: _string_
33
- Path to your main application file (which contains either `gradio` or `streamlit` Python code).
34
- Path is relative to the root of the repository.
35
-
36
- `pinned`: _boolean_
37
- Whether the Space stays on top of your list.
 
1
  ---
2
+ title: dijkprofile-annotator
3
+ emoji: 💦
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
9
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import math
3
+ import gradio as gr
4
+ import dijkprofile_annotator
5
+ from zipfile import ZipFile
6
+
7
+ def annotate_file(file_objects, model_type):
8
+ # TODO: actually use different model types based on selected model, only a well trained dijk model is available now.
9
+ generated_charfiles = []
10
+ str1 = "Starting processing of files."
11
+ pad1 = math.floor((os.get_terminal_size().columns - len(str1)) / 2) * "="
12
+ print(pad1 + "Starting processing of files." + pad1)
13
+ for i, file_obj in enumerate(file_objects):
14
+ target_filepath = f"/tmp/characteristicpoints_{i}.csv"
15
+ print(f" Processing file '{file_obj.name}', model '{model_type}', saving to '{target_filepath}'")
16
+ dijkprofile_annotator.annotate(file_obj.name, target_filepath, device='cpu')
17
+ generated_charfiles.append(target_filepath)
18
+ print(f" finished processing: {file_obj.name}! saved to : {target_filepath}")
19
+ print(" ", "-" * (os.get_terminal_size().columns - 5))
20
+
21
+ print("finished with all processing!")
22
+ # return the csv file if only 1 file was given, return a zip otherwise.
23
+ if len(generated_charfiles) == 1:
24
+ print(f"returning file: {generated_charfiles[0]}")
25
+ return generated_charfiles[0]
26
+ else:
27
+ return_zipfile = "/tmp/characterist_points.zip"
28
+ with ZipFile(return_zipfile, 'w') as zipObj:
29
+ for filepath in generated_charfiles:
30
+ zipObj.write(filepath)
31
+ print(f"returning file: {return_zipfile}")
32
+ return return_zipfile
33
+
34
+ description = "Upload een surfacelines.csv bestand in QDAMEdit format en krijg een annotatie file in characteristicpoints format terug \n" +\
35
+ "Een neural netwerk gebaseerd op image segmentation heeft geleerd op basis van ~6000 geannoteerde profielen om zo goed mogelijk automatisch de punten te plaatsen op de profielen.\n" +\
36
+ "Er zijn meerdere modellen beschikbaar om de annotatie te genereren, het 'dijk' model probeert alleen de dijk te vinden, het 'dijk+sloot' model zoekt ook naar een sloot en het 'volledig' model " +\
37
+ "probeert zo veel mogelijk van de punten beschikbaar in het QDAMEdit format te vinden. Probeer eerst het 'dijk' model aangezien hier de consistentste resultaten uit komen."
38
+
39
+ iface = gr.Interface(
40
+ fn=annotate_file,
41
+ title="Dijkprofiel Annotator",
42
+ description=description,
43
+ inputs=[gr.inputs.File(file_count="multiple", type="file", label="te annoteren surfacelines files", optional=False), gr.inputs.Dropdown(['dijk', 'dijk+sloot', "volledig"], type="value", default=None, label='Model type')],
44
+ outputs=gr.outputs.File(label="gegenereerde file"))
45
+ iface.launch()
dijkprofile-annotator/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Het Waterschapshuis / Kenniscentrum / Tooling
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dijkprofile-annotator/conftest.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import pytest
3
+
4
+
5
+ # This is used to get code coverage working correctly when running unit tests
6
+ def pytest_collection_modifyitems(items):
7
+ no_cov = pytest.mark.no_cover
8
+ for item in items:
9
+ if "integration" in Path(item.fspath).parts:
10
+ item.add_marker(no_cov)
dijkprofile-annotator/dijkprofile_annotator/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import models
2
+ from . import dataset
3
+ from . import training
4
+ from . import utils
5
+ from . import config
6
+ from . import preprocessing
7
+ from .utils import visualize_sample
8
+ from .utils import visualize_prediction
9
+ from .utils import visualize_files
10
+ from .utils import visualize_dict
11
+ from .annotator import annotate
12
+ from .annotator import make_predictions
13
+ from .annotator import write_predictions_
dijkprofile-annotator/dijkprofile_annotator/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (637 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/__pycache__/annotator.cpython-39.pyc ADDED
Binary file (2.74 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/__pycache__/config.cpython-39.pyc ADDED
Binary file (4.18 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/annotator.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+
4
+ import numpy as np
5
+ import torch
6
+
7
+ import dijkprofile_annotator.config as config
8
+ import dijkprofile_annotator.utils as utils
9
+ import dijkprofile_annotator.preprocessing as preprocessing
10
+ from dijkprofile_annotator.models import Dijknet
11
+
12
+
13
+ def annotate(surfacelines_filepath, outputfile, class_list='simple', max_profile_length=512, custom_model_path=None, custom_scaler_path=None, device=None):
14
+ surfacelines_dict = preprocessing.read_surfaceline_file(surfacelines_filepath)
15
+ profile_dict = preprocessing.make_height_profiles(surfacelines_dict, max_profile_length)
16
+
17
+ dir = os.path.dirname(__file__)
18
+
19
+ if device:
20
+ device = device
21
+ else:
22
+ # setup model
23
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
+
25
+ class_dict, _, _ = utils.get_class_dict(class_list)
26
+ model = Dijknet(1, len(class_dict))
27
+
28
+ if custom_model_path:
29
+ model.load_state_dict(torch.load(custom_model_path, map_location=device))
30
+ else:
31
+ model.load_state_dict(torch.load(os.path.join(dir, config.MODEL_PATH), map_location=device))
32
+ model.eval()
33
+
34
+ # copy network to device
35
+ model = model.to(device)
36
+
37
+ predictions = make_predictions(model, profile_dict, max_profile_length, device)
38
+
39
+ write_predictions_(predictions, profile_dict, surfacelines_dict, outputfile, class_list)
40
+
41
+
42
+ def make_predictions(model, profile_dict, max_profile_length, device):
43
+ accumulator = np.zeros((len(profile_dict), max_profile_length))
44
+ for i, key in enumerate(profile_dict.keys()):
45
+ accumulator[i] = profile_dict[key]['profile'][:max_profile_length]
46
+
47
+ accumulator = accumulator.reshape(accumulator.shape[0], 1, max_profile_length)
48
+
49
+ outputs = model(torch.tensor(accumulator).to(device).float())
50
+ flat_output = torch.argmax(outputs, dim=1).cpu()
51
+ predictions = flat_output.numpy()
52
+ return predictions
53
+
54
+
55
+ def write_predictions_(predictions, profile_dict, surfacelines_dict, output_filepath, class_list):
56
+ class_dict, inverse_class_dict, class_weights = utils.get_class_dict(class_list)
57
+
58
+ with open(output_filepath, 'w') as csvFile:
59
+ writer = csv.writer(csvFile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
60
+ writer.writerow(config.HEADER)
61
+ for i, key in enumerate(profile_dict.keys()):
62
+ # get predictions
63
+ profile_pred = predictions[i]
64
+
65
+ # construct dict with key for each row
66
+ row_dict = {key:-1 for key in config.HEADER}
67
+ row_dict["LOCATIONID"] = key
68
+
69
+ # loop through predictions and for the entries
70
+ used_classes = []
71
+ prev_class_n = 999 # key thats not in the inverse_class_dict
72
+ for index, class_n in enumerate(profile_pred):
73
+ if class_n == 0 or class_n in used_classes:
74
+ continue
75
+ if class_n != prev_class_n:
76
+ # get class name
77
+ class_name = inverse_class_dict[class_n]
78
+
79
+ # if this index is different from the last, this is the characteristicpoint
80
+ used_classes.append(prev_class_n)
81
+
82
+ # set prev_class to the new class
83
+ prev_class_n = class_n
84
+
85
+ # construct the csv row with the new class
86
+ if index >= len(surfacelines_dict[key]):
87
+ continue
88
+
89
+ (x,y,z) = surfacelines_dict[key][index]
90
+ row_dict["X_" + class_name] = round(x, 3)
91
+ row_dict["Y_" + class_name] = round(y, 3)
92
+ row_dict["Z_" + class_name] = round(z, 3)
93
+
94
+ # write the row to the csv file
95
+ row = []
96
+ for columnname in config.HEADER:
97
+ row.append(row_dict[columnname])
98
+ writer.writerow(row)
dijkprofile-annotator/dijkprofile_annotator/config.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ CHARPOINT_CONVERSION_DICT = {
4
+ "": "leeg",
5
+ "101_Q19_2": "buitenkruin",
6
+ "101_Q19_3": "binnenkruin",
7
+ "101_Q19_5": "binnenteen",
8
+ "105_T09_11": "insteek_sloot",
9
+ "811_T13_8": "leeg",
10
+ "351_T03_10": "leeg",
11
+ "_T01_KKW": "leeg",
12
+ "108_Q06_250": "leeg",
13
+ "303_Q05_1": "leeg",
14
+ "353__11": "leeg",
15
+ "_T00_17": "leeg",
16
+ "109_Q08_13": "leeg",
17
+ "_Q07_KDM": "leeg",
18
+ "_Q07_KDW": "leeg",
19
+ '0': "leeg",
20
+ None: "leeg",
21
+ 'nan': "leeg"
22
+ }
23
+
24
+ CLASS_DICT_REGIONAL = {
25
+ "leeg": 0,
26
+ "startpunt": 1,
27
+ "buitenkruin": 2,
28
+ "binnenkruin": 3,
29
+ "binnenteen": 4,
30
+ "insteek_sloot": 5
31
+ }
32
+
33
+ WEIGHT_DICT_REGIONAL = [0.1, 1.0, 1.1, 1.0, 0.1]
34
+
35
+ CLASS_DICT_FULL = {
36
+ 'leeg': 0,
37
+ 'Maaiveld binnenwaarts': 1,
38
+ 'Insteek sloot polderzijde': 2,
39
+ 'Slootbodem polderzijde': 3,
40
+ 'Slootbodem dijkzijde': 4,
41
+ 'Insteek sloot dijkzijde': 5,
42
+ 'Teen dijk binnenwaarts': 6,
43
+ 'Kruin binnenberm': 7,
44
+ 'Insteek binnenberm': 8,
45
+ 'Kruin binnentalud': 9,
46
+ 'Verkeersbelasting kant binnenwaarts': 9, # 10
47
+ 'Verkeersbelasting kant buitenwaarts': 10,
48
+ 'Kruin buitentalud': 10, # 12
49
+ 'Insteek buitenberm': 11,
50
+ 'Kruin buitenberm': 12,
51
+ 'Teen dijk buitenwaarts': 13,
52
+ 'Insteek geul': 14,
53
+ 'Teen geul': 15,
54
+ 'Maaiveld buitenwaarts': 16,
55
+ }
56
+
57
+ # TODO: write this out explicitely
58
+ WEIGHT_DICT_FULL = [1.0] * 17
59
+
60
+ CLASS_DICT_SIMPLE = {
61
+ 'leeg': 0,
62
+ 'Maaiveld buitenwaarts': 1,
63
+ 'Teen dijk buitenwaarts': 2,
64
+ 'Kruin buitentalud': 3,
65
+ 'Kruin binnentalud': 4,
66
+ 'Teen dijk binnenwaarts': 5,
67
+ }
68
+
69
+ WEIGHT_DICT_SIMPLE = [0.1, 0.5, 0.7, 1.0, 1.0, 0.5]
70
+
71
+ CLASS_DICT_SIMPLE_SLOOT = {
72
+ 'leeg': 0,
73
+ 'Maaiveld buitenwaarts': 1,
74
+ 'Teen dijk buitenwaarts': 2,
75
+ 'Kruin buitentalud': 3,
76
+ 'Kruin binnentalud': 4,
77
+ 'Teen dijk binnenwaarts': 5,
78
+ 'Insteek sloot dijkzijde': 6,
79
+ 'Insteek sloot polderzijde': 7,
80
+ 'Slootbodem polderzijde': 8,
81
+ 'Slootbodem dijkzijde': 9,
82
+ }
83
+
84
+ WEIGHT_DICT_SIMPLE_SLOOT = [0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]
85
+
86
+ CLASS_DICT_SIMPLE_BERM = {
87
+ 'leeg': 0,
88
+ 'Maaiveld buitenwaarts': 1,
89
+ 'Teen dijk buitenwaarts': 2,
90
+ 'Kruin buitentalud': 3,
91
+ 'Kruin binnentalud': 4,
92
+ 'Teen dijk binnenwaarts': 5,
93
+ 'Insteek sloot dijkzijde': 6,
94
+ 'Insteek sloot polderzijde': 7,
95
+ 'Slootbodem polderzijde': 8,
96
+ 'Slootbodem dijkzijde': 9,
97
+ 'Kruin binnenberm': 10,
98
+ 'Insteek binnenberm': 11,
99
+ }
100
+ WEIGHT_DICT_SIMPLE_BERM = [0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]
101
+
102
+ HEADER = ["LOCATIONID",
103
+ "X_Maaiveld binnenwaarts",
104
+ "Y_Maaiveld binnenwaarts",
105
+ "Z_Maaiveld binnenwaarts",
106
+ "X_Insteek sloot polderzijde",
107
+ "Y_Insteek sloot polderzijde",
108
+ "Z_Insteek sloot polderzijde",
109
+ "X_Slootbodem polderzijde",
110
+ "Y_Slootbodem polderzijde",
111
+ "Z_Slootbodem polderzijde",
112
+ "X_Slootbodem dijkzijde",
113
+ "Y_Slootbodem dijkzijde",
114
+ "Z_Slootbodem dijkzijde",
115
+ "X_Insteek sloot dijkzijde",
116
+ "Y_Insteek sloot dijkzijde",
117
+ "Z_Insteek sloot dijkzijde",
118
+ "X_Teen dijk binnenwaarts",
119
+ "Y_Teen dijk binnenwaarts",
120
+ "Z_Teen dijk binnenwaarts",
121
+ "X_Kruin binnenberm",
122
+ "Y_Kruin binnenberm",
123
+ "Z_Kruin binnenberm",
124
+ "X_Insteek binnenberm",
125
+ "Y_Insteek binnenberm",
126
+ "Z_Insteek binnenberm",
127
+ "X_Kruin binnentalud",
128
+ "Y_Kruin binnentalud",
129
+ "Z_Kruin binnentalud",
130
+ "X_Verkeersbelasting kant binnenwaarts",
131
+ "Y_Verkeersbelasting kant binnenwaarts",
132
+ "Z_Verkeersbelasting kant binnenwaarts",
133
+ "X_Verkeersbelasting kant buitenwaarts",
134
+ "Y_Verkeersbelasting kant buitenwaarts",
135
+ "Z_Verkeersbelasting kant buitenwaarts",
136
+ "X_Kruin buitentalud",
137
+ "Y_Kruin buitentalud",
138
+ "Z_Kruin buitentalud",
139
+ "X_Insteek buitenberm",
140
+ "Y_Insteek buitenberm",
141
+ "Z_Insteek buitenberm",
142
+ "X_Kruin buitenberm",
143
+ "Y_Kruin buitenberm",
144
+ "Z_Kruin buitenberm",
145
+ "X_Teen dijk buitenwaarts",
146
+ "Y_Teen dijk buitenwaarts",
147
+ "Z_Teen dijk buitenwaarts",
148
+ "X_Insteek geul",
149
+ "Y_Insteek geul",
150
+ "Z_Insteek geul",
151
+ "X_Teen geul",
152
+ "Y_Teen geul",
153
+ "Z_Teen geul",
154
+ "X_Maaiveld buitenwaarts",
155
+ "Y_Maaiveld buitenwaarts",
156
+ "Z_Maaiveld buitenwaarts"]
157
+
158
+ SCALER_PATH = os.path.join("data", "trained_models", "scaler.pik")
159
+ MODEL_PATH = os.path.join('data', 'trained_models', 'dijknet_simple_95.pt')
160
+
161
+ INVERSE_CLASS_DICT_FULL = {v: k for k, v in CLASS_DICT_FULL.items()}
162
+ INVERSE_CLASS_DICT_SIMPLE = {v: k for k, v in CLASS_DICT_SIMPLE.items()}
163
+ INVERSE_CLASS_DICT_SIMPLE_BERM = {v: k for k, v in CLASS_DICT_SIMPLE_BERM.items()}
164
+ INVERSE_CLASS_DICT_SIMPLE_SLOOT = {v: k for k, v in CLASS_DICT_SIMPLE_SLOOT.items()}
165
+ INVERSE_CLASS_DICT_REGIONAL = {v: k for k, v in CLASS_DICT_REGIONAL.items()}
166
+
167
+ # manual mappings to get the correct names for plotting later
168
+ if 11 in INVERSE_CLASS_DICT_FULL:
169
+ INVERSE_CLASS_DICT_FULL[10] = 'Kruin buitentalud'
dijkprofile-annotator/dijkprofile_annotator/data/trained_models/dijknet_simple_95.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f131655b81f0fb8f03fb7162f42a1b9baae1bd0fda064c761a0af35b4858e0
3
+ size 106467655
dijkprofile-annotator/dijkprofile_annotator/data/trained_models/scaler.pik ADDED
Binary file (420 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/dataset/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .dataset import DijkprofileDataset
dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (267 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/dataset.cpython-39.pyc ADDED
Binary file (1.81 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/dataset/dataset.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch.utils.data as data
3
+
4
+
5
+ class DijkprofileDataset(data.Dataset):
6
+ """Pytorch custom dataset class to use with the pytorch dataloader."""
7
+
8
+ def __init__(self, profile_dict, partition, custom_scaler_path=None):
9
+ """Dijkprofile Dataset, provides profiles and labels to pytorch model.
10
+
11
+ Args:
12
+ profile_dict (dict): dict containing the profiles and labels
13
+ partition (list): list used to split the dataset into train and test
14
+ sets. list contains ids to use for this dataset, format is
15
+ as returned by sklearn.model_selection.train_test_split
16
+ """
17
+ self.data_dict = profile_dict
18
+ self.list_IDs = partition
19
+
20
+ print("scaler in dataset class is depracated and moved to preprocessing")
21
+ # load scaler
22
+ # if custom_scaler_path:
23
+ # self.scaler = joblib.load(custom_scaler_path)
24
+ # else:
25
+ # self.scaler = joblib.load(os.path.join(dir, config.SCALER_PATH))
26
+ # # rescale all profiles profiles
27
+ # for key in profile_dict.keys():
28
+ # profile_dict[key]['profile'] = self.scaler.transform(
29
+ # profile_dict[key]['profile'].reshape(-1, 1)).reshape(-1)
30
+ # profile_dict[key]['profile'] = profile_dict[key]['profile'] / 10
31
+
32
+ def __len__(self):
33
+ return len(self.list_IDs)
34
+
35
+ def __getitem__(self, index):
36
+ id = self.list_IDs[index]
37
+ X = self.data_dict[id]['profile'].reshape(1,-1).astype(np.float32)
38
+ y = self.data_dict[id]['label'].reshape(1,-1)
39
+ return X, y
40
+
41
+ def __str__(self):
42
+ return "<Dijkprofile dataset: datapoints={}>".format(len(self.list_IDs))
dijkprofile-annotator/dijkprofile_annotator/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .dijknet import Dijknet
dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (255 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/dijknet.cpython-39.pyc ADDED
Binary file (3.86 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/models/dijknet.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from dijkprofile_annotator.utils import extract_img
4
+
5
+
6
+ class Double_conv(nn.Module):
7
+ '''(conv => ReLU) * 2 => MaxPool2d'''
8
+ def __init__(self, in_ch, out_ch, p):
9
+ """
10
+ Args:
11
+ in_ch(int) : input channel
12
+ out_ch(int) : output channel
13
+ """
14
+ super(Double_conv, self).__init__()
15
+ self.conv = nn.Sequential(
16
+ nn.Conv1d(in_ch, out_ch, 3, padding=1, stride=1),
17
+ nn.ReLU(inplace=True),
18
+ nn.Conv1d(out_ch, out_ch, 5, padding=2, stride=1),
19
+ nn.ReLU(inplace=True),
20
+ nn.Conv1d(out_ch, out_ch, 7, padding=3, stride=1),
21
+ nn.ReLU(inplace=True),
22
+ nn.Dropout(p=p)
23
+ )
24
+ def forward(self, x):
25
+ x = self.conv(x)
26
+ return x
27
+
28
+
29
+ class Conv_down(nn.Module):
30
+ '''(conv => ReLU) * 2 => MaxPool2d'''
31
+
32
+ def __init__(self, in_ch, out_ch, p):
33
+ """
34
+ Args:
35
+ in_ch(int) : input channel
36
+ out_ch(int) : output channel
37
+ """
38
+ super(Conv_down, self).__init__()
39
+ self.conv = Double_conv(in_ch, out_ch, p)
40
+ self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
41
+
42
+ def forward(self, x):
43
+ x = self.conv(x)
44
+ pool_x = self.pool(x)
45
+ return pool_x, x
46
+
47
+
48
+ class Conv_up(nn.Module):
49
+ '''(conv => ReLU) * 2 => MaxPool2d'''
50
+
51
+ def __init__(self, in_ch, out_ch, p):
52
+ """
53
+ Args:
54
+ in_ch(int) : input channel
55
+ out_ch(int) : output channel
56
+ """
57
+ super(Conv_up, self).__init__()
58
+ self.up = nn.ConvTranspose1d(in_ch, out_ch, kernel_size=2, stride=2)
59
+ self.conv = Double_conv(in_ch, out_ch, p)
60
+
61
+ def forward(self, x1, x2):
62
+ x1 = self.up(x1)
63
+ x1_dim = x1.size()[2]
64
+ x2 = extract_img(x1_dim, x2)
65
+ x1 = torch.cat((x1, x2), dim=1)
66
+ x1 = self.conv(x1)
67
+ return x1
68
+
69
+
70
+ class Dijknet(nn.Module):
71
+ """Dijknet convolutional neural network. 1D Unet variant."""
72
+
73
+ def __init__(self, in_channels, out_channels, p=0.25):
74
+ """Dijknet convlutional neural network, 1D Unet Variant. Model is probably a bit too big
75
+ for what it needs to do, but it seems to work just fine.
76
+
77
+ Args:
78
+ in_channels (int): number of input channels, should be 1
79
+ out_channels (int): number of output channels/classes
80
+ p (float, optional): dropout chance for the dropout layers. Defaults to 0.25.
81
+ """
82
+ super(Dijknet, self).__init__()
83
+ self.Conv_down1 = Conv_down(in_channels, 64, p)
84
+ self.Conv_down2 = Conv_down(64, 128, p)
85
+ self.Conv_down3 = Conv_down(128, 256, p)
86
+ self.Conv_down4 = Conv_down(256, 512, p)
87
+ self.Conv_down5 = Conv_down(512, 1024, p)
88
+ self.Conv_up1 = Conv_up(1024, 512, p)
89
+ self.Conv_up2 = Conv_up(512, 256, p)
90
+ self.Conv_up3 = Conv_up(256, 128, p)
91
+ self.Conv_up4 = Conv_up(128, 64, p)
92
+ self.Conv_up5 = Conv_up(128, 64, p)
93
+ self.Conv_out = nn.Conv1d(64, out_channels, 1, padding=0, stride=1)
94
+ self.Conv_final = nn.Conv1d(out_channels, out_channels, 1, padding=0, stride=1)
95
+
96
+ def forward(self, x):
97
+ x, conv1 = self.Conv_down1(x)
98
+ x, conv2 = self.Conv_down2(x)
99
+ x, conv3 = self.Conv_down3(x)
100
+ x, conv4 = self.Conv_down4(x)
101
+ _, x = self.Conv_down5(x)
102
+ x = self.Conv_up1(x, conv4)
103
+ x = self.Conv_up2(x, conv3)
104
+ x = self.Conv_up3(x, conv2)
105
+ x = self.Conv_up4(x, conv1)
106
+ # final upscale to true size
107
+ x = self.Conv_out(x)
108
+ x = self.Conv_final(x)
109
+ return x
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .preprocessing import filepath_pair_to_labeled_sample
2
+ from .preprocessing import file_pairs_to_tensor_profiles
3
+ from .preprocessing import read_charpoints_file
4
+ from .preprocessing import read_surfaceline_file
5
+ from .preprocessing import make_height_profiles
6
+ from .preprocessing import make_labeled_height_profiles
7
+ from .preprocessing import get_file_pairs_from_dir
8
+ from .preprocessing import load_datasets
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (571 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/link_collector.cpython-39.pyc ADDED
Binary file (1.02 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/preprocessing.cpython-39.pyc ADDED
Binary file (11.2 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/preprocessing/preprocessing.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+ from operator import itemgetter
4
+
5
+ import numpy as np
6
+ from dijkprofile_annotator.config import (CLASS_DICT_FULL, CLASS_DICT_REGIONAL,
7
+ CLASS_DICT_SIMPLE,
8
+ CLASS_DICT_SIMPLE_BERM,
9
+ CLASS_DICT_SIMPLE_SLOOT)
10
+ from dijkprofile_annotator.dataset import DijkprofileDataset
11
+ from sklearn.model_selection import train_test_split
12
+
13
+
14
+ def read_surfaceline_file(surfaceline_fp):
15
+ """Read surfaceline file and convert to dict.
16
+
17
+ Args:
18
+ surfaceline_fp (string): path to the surfacelines file.
19
+
20
+ Returns:
21
+ dict: dict containing list of points per location.
22
+ """
23
+ # read the coordinates and collect to surfaceline_dict
24
+ surfacelines = {}
25
+ with open(surfaceline_fp) as csvfile:
26
+ surfacereader = csv.reader(csvfile, delimiter=';', quotechar='|')
27
+ next(surfacereader) # skip header
28
+ # print("header: {}".format(header)) # not very useful
29
+ stop_exec = False
30
+ for row in surfacereader:
31
+ if stop_exec:
32
+ break
33
+ location = row[0]
34
+ surfacelines[location] = []
35
+ for i in range(1, len(row)-2, 3):
36
+ # some files have empty points
37
+ if row[i] == '' or row[i+1] == '' or row[i+2] == '':
38
+ continue
39
+ try:
40
+
41
+ x = _parse_coordinate(row[i].replace('"', ''))
42
+ y = _parse_coordinate(row[i+1].replace('"', ''))
43
+ z = _parse_coordinate(row[i+2].replace('"', ''))
44
+ surfacelines[location].append((x, y, z))
45
+ except ValueError as e:
46
+ print(f"error reading point from surfaceline at location: {location} (index: {i}), error: {e}")
47
+ stop_exec = True
48
+ break
49
+ return surfacelines
50
+
51
+
52
+ def read_charpoints_file(charlines_fp):
53
+ """Read characteristicpoints file and convert to dict.
54
+
55
+ Args:
56
+ charlines_fp (string): path to characteristicpoints file.
57
+
58
+ Returns:
59
+ dict: dict containing list of points per location.
60
+ """
61
+ charpoints = {}
62
+ with open(charlines_fp) as csvfile:
63
+ cpointsreader = csv.reader(csvfile, delimiter=';', quotechar='|')
64
+ header = next(cpointsreader)
65
+ stop_exec = False
66
+ for idx, row in enumerate(cpointsreader):
67
+ if stop_exec:
68
+ break
69
+ try:
70
+ location = row[0]
71
+ except IndexError as e:
72
+ print(f"couldn't read location in row: {row} at {idx}, file: {charlines_fp}")
73
+ point_dict = {}
74
+ for i in range(1, len(row)-2, 3):
75
+ if row[i] == '' or row[i+1] == '' or row[i+2] == '':
76
+ continue
77
+ try:
78
+ x = _parse_coordinate(row[i].replace('"', ''))
79
+ y = _parse_coordinate(row[i+1].replace('"', ''))
80
+ z = _parse_coordinate(row[i+2].replace('"', ''))
81
+
82
+ point_dict[header[i][2:]] = (x, y, z)
83
+ except ValueError as e:
84
+ print(
85
+ f"error reading point from characteristicpoints at location: {location} (index: {i}), error: {e}")
86
+ stop_exec = True
87
+
88
+ charpoints[location] = point_dict
89
+ return charpoints
90
+
91
+
92
+ def _parse_coordinate(coord):
93
+ """Convert string point coordinate to float, remove double dots if needed.
94
+ Some of the coordinates contain multiple dots, probably because someone
95
+ opened the file in excel and it formatted it weird. In all examples I've
96
+ seen the first point is only to indicate 1000's and can savely be removed
97
+
98
+ Args:
99
+ point (str): string representation of the number to parse
100
+
101
+ Returns:
102
+ float: float representation of the coordinate
103
+ """
104
+ try:
105
+ return float(coord)
106
+ except:
107
+ parts = coord.split(".")
108
+ return float("".join(parts[:-1]) + "." + parts[-1])
109
+
110
+
111
+ def make_height_profiles(surfaceline_dict, max_profile_size):
112
+ """Make height arrays from surfacelines dict.
113
+
114
+ Args:
115
+ surfaceline_dict (dict): dict of surfacelines by location.
116
+ max_profile_size (int): fixed max size for the height profile.
117
+
118
+ Returns:
119
+ dict: dict containing height profiles by location.
120
+ """
121
+ profile_dict = {}
122
+ for location in surfaceline_dict.keys():
123
+ heights = np.array(surfaceline_dict[location])[:, 2].astype(np.float32)
124
+
125
+ # we'll fit whole profile in a fixed length so that multiple profiles can be used as samples
126
+ z_tmp = np.zeros(max_profile_size)
127
+ profile_length = heights.shape[0]
128
+ if profile_length < max_profile_size:
129
+ z_tmp[:profile_length] = np.array(heights, dtype=np.float32)[:profile_length]
130
+ z_tmp[profile_length:] = heights[profile_length-1]
131
+ heights = z_tmp
132
+ else:
133
+ heights = heights[:max_profile_size]
134
+ profile_dict[location] = {"profile": heights}
135
+ return profile_dict
136
+
137
+
138
+ def make_labeled_height_profiles(surfaceline_dict, cpoints_dict, max_profile_size, class_list='simple', require_all_points=True):
139
+ """Make height profile and labels from surfacelines and cpoints.
140
+
141
+ Args:
142
+ surfaceline_dict (dict): dict of surfacelines by location.
143
+ cpoints_dict (dict): dict of characteristic points by location.
144
+ max_profile_size (int): fixed max size for the height profile.
145
+ class_list (bool): selection of classes to use, see config.
146
+ require_all_points: filter profiles that do not contain all the points in the class_list.
147
+
148
+ Returns:
149
+ dict: dict containing height profiles and their labels by location.
150
+ """
151
+ profile_label_dict = {}
152
+
153
+ class_list = class_list.lower()
154
+ class_dict = {}
155
+ if class_list == 'regional':
156
+ class_dict = CLASS_DICT_REGIONAL
157
+ elif class_list == 'simple':
158
+ class_dict = CLASS_DICT_SIMPLE
159
+ elif class_list == 'berm':
160
+ class_dict = CLASS_DICT_SIMPLE_BERM
161
+ elif class_list == 'sloot':
162
+ class_dict = CLASS_DICT_SIMPLE_SLOOT
163
+ elif class_list == 'full':
164
+ class_dict = CLASS_DICT_FULL
165
+ else:
166
+ raise NotImplementedError(f"No class list available of type: {class_list}")
167
+
168
+ required_point_types = list(class_dict.keys())
169
+ required_point_types.remove('leeg') # we don't want to require check for the empty class
170
+
171
+ for location in surfaceline_dict.keys():
172
+ heights = np.array(surfaceline_dict[location])[:, 2].astype(np.float32)
173
+ labels = np.zeros(len(heights))
174
+
175
+ # if no labels were given for this location, skip it
176
+ if not location in cpoints_dict.keys():
177
+ # print(f"location not in cpoints dict, {location}")
178
+ continue
179
+
180
+ # skip the location if the required points are not all present
181
+ if require_all_points:
182
+ labeled_point_types = [key for key, value in cpoints_dict[location].items() if value != (-1.0, -1.0, -1.0)]
183
+ if not all([point_type in labeled_point_types for point_type in required_point_types]):
184
+ # print(f"not all point types present, missing {set(required_point_types) - set(labeled_point_types)}")
185
+ continue
186
+
187
+ for i, (key, point) in enumerate(cpoints_dict[location].items()):
188
+ # if the point is not empty, find the nearest point in the surface file,
189
+ # problems with rounding errors require matching by distance per point
190
+ if point == (-1.0, -1.0, -1.0):
191
+ continue
192
+
193
+ distances = []
194
+ for idx, surfacepoint in enumerate(surfaceline_dict[location]):
195
+ dist = np.linalg.norm(np.array(surfacepoint)-np.array(point))
196
+ distances.append((idx, dist))
197
+ (idx, dist) = sorted(distances, key=itemgetter(1))[0]
198
+ if key in class_dict:
199
+ labels[idx] = class_dict[key]
200
+
201
+ # forward fill the labels
202
+ for i in range(1, len(labels)):
203
+ if labels[i] == 0.0:
204
+ labels[i] = labels[i-1]
205
+
206
+ # we'll fit whole profile in a fixed length so that multiple profiles can be used as samples
207
+ z_tmp = np.zeros(max_profile_size)
208
+ labels_tmp = np.zeros(max_profile_size)
209
+ profile_length = labels.shape[0]
210
+ if profile_length < max_profile_size:
211
+ z_tmp[:profile_length] = np.array(heights, dtype=np.float32)[:profile_length]
212
+ labels_tmp[:profile_length] = np.array(labels)[:profile_length]
213
+ z_tmp[profile_length:] = heights[profile_length-1]
214
+ labels_tmp[profile_length:] = labels[profile_length-1]
215
+ heights = z_tmp
216
+ labels = labels_tmp
217
+ else:
218
+ heights = heights[:max_profile_size]
219
+ labels = labels[:max_profile_size]
220
+
221
+ # rescale every profile to between -1 and 1
222
+ # scaler = MinMaxScaler(feature_range=(-1, 1))
223
+ # heights = scaler.fit_transform(heights.reshape(-1, 1))
224
+
225
+ profile_label_dict[location] = {}
226
+ profile_label_dict[location]['profile'] = heights.astype(np.float32)
227
+ profile_label_dict[location]['label'] = labels.astype(np.int32)
228
+ return profile_label_dict
229
+
230
+
231
+ def filepath_pair_to_labeled_sample(source_surfacelines, source_characteristicpoints, max_profile_size=352, class_list='simple', require_all_points=True):
232
+ """Convert pair of surfacelines and characteristicpoints filepaths to format suited for machine learning.
233
+
234
+ Args:
235
+ source_surfacelines (string): path to the surfacelines file.
236
+ source_characteristicpoints (string): path to the characteristicpoints file.
237
+ max_profile_size (int, optional): max size for the profile. Defaults to 352.
238
+ regional (bool): use regional point labelset, see config. Defaults to False.
239
+
240
+ Returns:
241
+ dict: dict containing height profile and labels by location.
242
+ """
243
+ surfaceline_dict = read_surfaceline_file(source_surfacelines)
244
+ cpoints_dict = read_charpoints_file(source_characteristicpoints)
245
+
246
+ profile_label_dict = make_labeled_height_profiles(
247
+ surfaceline_dict,
248
+ cpoints_dict,
249
+ max_profile_size,
250
+ class_list=class_list,
251
+ require_all_points=require_all_points)
252
+ return profile_label_dict
253
+
254
+
255
+ def file_pairs_to_tensor_profiles(filepair_list, max_profile_size=352, class_list='simple', require_all_points=True):
256
+ """Convert list of pairs of surfacelines and characteristicpoints to format suited for machine learning.
257
+
258
+ Args:
259
+ filepair_list (list): list of tuples containing the paths to the surfacelines and characteristicpoints files.
260
+ max_profile_size (int, optional): max size for the profile. Defaults to 352.
261
+ regional (bool): use regional point labelset, see config. Defaults to False.
262
+
263
+ Returns:
264
+ dict: Dict containing all the height profiles and labels by location.
265
+ """
266
+ all_profiles = {}
267
+ for source_surfacelines, source_characteristicpoints in filepair_list:
268
+ profile_label_dict = filepath_pair_to_labeled_sample(
269
+ source_surfacelines,
270
+ source_characteristicpoints,
271
+ max_profile_size,
272
+ class_list,
273
+ require_all_points=require_all_points)
274
+ for key, value in profile_label_dict.items():
275
+ all_profiles[key] = value
276
+ return all_profiles
277
+
278
+
279
+ def get_file_pairs_from_dir(path, krp_format=False):
280
+ """Recursively get all pairs of lines and points files in a directory.
281
+
282
+ Args:
283
+ path (str): path to the root directory containing the lines and points csv files,
284
+ directory is searched recursively for pairs.
285
+ krp (bool): Indicates that the folder contains csv files in the naming convention used by
286
+ waterschap Vallei en Veluwe.
287
+
288
+ Returns:
289
+ list: list of tuples where the first item is the path to the surfacelines.csv and the second
290
+ the path to the characteristicpoints.csv
291
+ """
292
+ if krp_format:
293
+ return _get_file_pairs_from_dir_krp(path)
294
+ list_of_files = []
295
+ for (dirpath, _, filenames) in os.walk(path):
296
+ for filename in filenames:
297
+ if filename.endswith('lines.csv'):
298
+ if os.path.exists(os.sep.join([dirpath, filename])) and \
299
+ os.path.exists(os.sep.join([dirpath, 'characteristicpoints.csv'])):
300
+
301
+ list_of_files.append((
302
+ os.sep.join([dirpath, filename]),
303
+ os.sep.join([dirpath, 'characteristicpoints.csv'])))
304
+ return list_of_files
305
+
306
+
307
+ def _get_file_pairs_from_dir_krp(path):
308
+ """Recursively get all pairs of lines and points files in a directory but in the format used
309
+ by Waterschap Vallei en Veluwe, same functionality as get_file_pairs_from_dir.
310
+
311
+ Args:
312
+ path (str): path to the root directory containing the lines and points csv files,
313
+ directory is searched recursively for pairs
314
+
315
+ Returns:
316
+ list: list of tuples where the first item is the path to the surfacelines.csv and the second
317
+ the path to the characteristicpoints.csv
318
+ """
319
+ list_of_files = []
320
+ for (dirpath, _, filenames) in os.walk(path):
321
+ for filename in filenames:
322
+ if filename.endswith('.krp.csv'):
323
+ if os.path.exists(os.sep.join([dirpath, filename])) and \
324
+ os.path.exists(os.sep.join([dirpath, filename.split(".krp")[0] + ".csv"])):
325
+
326
+ list_of_files.append((
327
+ os.sep.join([dirpath, filename.split(".krp")[0] + ".csv"]),
328
+ os.sep.join([dirpath, filename])))
329
+ return list_of_files
330
+
331
+
332
+ def load_datasets(annotation_tuples, custom_scaler_path=None, test_size=0.2, max_profile_size=512, class_list='simple', require_all_points=True):
333
+ """Load datasets given list of annotation tuples.
334
+
335
+ Args:
336
+ annotation_tuples ([(str,str)]): list of tuples of filepaths to the lines and points files.
337
+ custom_scaler_path (str, optional): path to a custom scaler to rescale the data. Defaults to None.
338
+ test_size (float, optional): Test size for the training. Defaults to 0.2.
339
+ max_profile_size (int, optional): max profile size. Defaults to 512.
340
+ class_list (str, optional): class_mapping/class_list to use. Defaults to 'simple'.
341
+ require_all_points (bool, optional): wether to drop profiles that don't contain all points in the mapping. Defaults to True.
342
+
343
+ Returns:
344
+ DijkprofileDataset, DijkprofileDataset: train and test dataset classes
345
+ """
346
+ profile_dict = file_pairs_to_tensor_profiles(annotation_tuples, max_profile_size=max_profile_size, class_list=class_list, require_all_points=require_all_points)
347
+
348
+ # construct dataloaders
349
+ id_list = list(profile_dict.keys())
350
+ [train, test] = train_test_split(id_list, shuffle=True, test_size=test_size)
351
+
352
+ dataset_train = DijkprofileDataset(profile_dict, train, custom_scaler_path=custom_scaler_path)
353
+ dataset_validation = DijkprofileDataset(profile_dict, test, custom_scaler_path=custom_scaler_path)
354
+
355
+ return dataset_train, dataset_validation
dijkprofile-annotator/dijkprofile_annotator/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ joblib==1.1.0
2
+ matplotlib==3.5.0
3
+ numpy==1.21.4
4
+ Pillow==8.4.0
5
+ scikit_learn==1.0.1
6
+ seaborn==0.11.2
7
+ torch==1.10.0
dijkprofile-annotator/dijkprofile_annotator/training/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .train import train
2
+ from .train import get_loss_train
3
+ from .train import accuracy_check
4
+ from .train import accuracy_check_for_batch
dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (367 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/train.cpython-39.pyc ADDED
Binary file (5.97 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/training/train.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dijkprofile_annotator.preprocessing as preprocessing
2
+ import dijkprofile_annotator.utils as utils
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ from dijkprofile_annotator.models import Dijknet
7
+ from PIL import Image
8
+ from torch.utils.data import DataLoader
9
+ from tqdm import tqdm
10
+
11
+
12
+ def get_loss_train(model, data_train, criterion):
13
+ """generate loss over train set.
14
+
15
+ Args:
16
+ model (): model to use for prediction
17
+ data_train (torch.utils.data.DataLoader)): Dataloader containing the profiles
18
+ and labels
19
+ criterion (pytorch loss function, probably nn.CrossEntropyLoss): loss function to be used.
20
+
21
+ Returns:
22
+ float: total accuracy
23
+ float: total loss
24
+ """
25
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
26
+ model.eval()
27
+ total_acc = 0
28
+ total_loss = 0
29
+ for batch, (profile, masks) in enumerate(data_train):
30
+ with torch.no_grad():
31
+ profile = torch.Tensor(profile).to(device)
32
+ masks = torch.Tensor(masks).to(device)
33
+ outputs = model(profile)
34
+ loss = criterion(outputs, masks)
35
+ preds = torch.argmax(outputs, dim=1).float()
36
+ acc = accuracy_check_for_batch(masks.cpu(), preds.cpu(), profile.size()[0])
37
+ total_acc = total_acc + acc
38
+ total_loss = total_loss + loss.cpu().item()
39
+ return total_acc/(batch+1), total_loss/(batch + 1)
40
+
41
+
42
+ def accuracy_check(mask, prediction):
43
+ """check accuracy of prediciton.
44
+
45
+ Args:
46
+ mask (torch.Tensor, PIL Image or str): labels
47
+ prediction (torch.Tensor, PIL Image or str): predictions
48
+
49
+ Returns:
50
+ float: accuracy of prediction given mask.
51
+ """
52
+ ims = [mask, prediction]
53
+ np_ims = []
54
+ for item in ims:
55
+ if 'str' in str(type(item)):
56
+ item = np.array(Image.open(item))
57
+ elif 'PIL' in str(type(item)):
58
+ item = np.array(item)
59
+ elif 'torch' in str(type(item)):
60
+ item = item.numpy()
61
+ np_ims.append(item)
62
+
63
+ compare = np.equal(np_ims[0], np_ims[1])
64
+ accuracy = np.sum(compare)
65
+
66
+ return accuracy/len(np_ims[0].flatten())
67
+
68
+
69
+ def accuracy_check_for_batch(masks, predictions, batch_size):
70
+ """check accuracy of prediciton given mask.
71
+
72
+ Args:
73
+ masks (torch.Tensor): labels
74
+ predictions (torch.Tensor): predictions
75
+ batch_size (int): batch size of prediciton/mask.
76
+
77
+ Returns:
78
+ float: accuracy of prediction given mask.
79
+ """
80
+ total_acc = 0
81
+ for index in range(batch_size):
82
+ total_acc += accuracy_check(masks[index], predictions[index])
83
+ return total_acc/batch_size
84
+
85
+
86
+ def train(annotation_tuples,
87
+ epochs=100,
88
+ batch_size_train=32,
89
+ batch_size_val=512,
90
+ num_workers=6,
91
+ custom_scaler_path=None,
92
+ class_list='simple',
93
+ test_size=0.2,
94
+ max_profile_size=512,
95
+ shuffle=True):
96
+ """[summary]
97
+
98
+ Args:
99
+ annotation_tuples ([type]): [description]
100
+ epochs (int, optional): [description]. Defaults to 100.
101
+ batch_size_train (int, optional): [description]. Defaults to 32.
102
+ batch_size_val (int, optional): [description]. Defaults to 512.
103
+ num_workers (int, optional): [description]. Defaults to 6.
104
+ custom_scaler_path ([type], optional): [description]. Defaults to None.
105
+ class_list (str, optional): [description]. Defaults to 'simple'.
106
+ test_size (float, optional): [description]. Defaults to 0.2.
107
+ max_profile_size (int, optional): [description]. Defaults to 512.
108
+ shuffle (bool, optional): [description]. Defaults to True.
109
+
110
+ Raises:
111
+ NotImplementedError: when given class_list is not implemented
112
+
113
+ Returns:
114
+ [type]: trained Dijknet model.
115
+ """
116
+ print(f"loading datasets")
117
+ train_dataset, test_dataset = preprocessing.load_datasets(annotation_tuples,
118
+ custom_scaler_path=custom_scaler_path,
119
+ test_size=test_size,
120
+ max_profile_size=max_profile_size)
121
+ print(f"loaded datasets:")
122
+ print(f" train: {len(train_dataset)} samples")
123
+ print(f" test: {len(test_dataset)} samples")
124
+
125
+ class_dict, _, class_weights = utils.get_class_dict(class_list)
126
+
127
+ print(f"constructing model with {len(class_dict)} output classes")
128
+ model = Dijknet(1, len(class_dict))
129
+
130
+ # parameters
131
+ train_params = {'batch_size': batch_size_train,
132
+ 'shuffle': shuffle,
133
+ 'num_workers': num_workers}
134
+
135
+ params_val = {'batch_size': batch_size_val,
136
+ 'shuffle': False,
137
+ 'num_workers': num_workers}
138
+
139
+ training_generator = DataLoader(train_dataset, **train_params)
140
+ validation_generator = DataLoader(test_dataset, **params_val)
141
+
142
+ # CUDA for PyTorch
143
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
144
+ model = model.to(device)
145
+
146
+ # loss
147
+ criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weights).to(device))
148
+
149
+ # Optimizer
150
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
151
+
152
+ print("starting training.")
153
+ # Loop over epochs
154
+ for epoch in range(epochs):
155
+ print("epoch: {}".format(epoch))
156
+ # Training
157
+ loss_list = []
158
+ model.train()
159
+ for local_batch, local_labels in tqdm(training_generator):
160
+ # bug with dataloader, it doesn't return the right size batch when it runs out of samples
161
+ if not local_labels.shape[0] == train_params['batch_size']:
162
+ continue
163
+
164
+ # Transfer to GPU
165
+ local_batch, local_labels = local_batch.to(device), local_labels.to(device).long()
166
+
167
+ # Model computations
168
+ outputs = model(local_batch)
169
+ local_labels = local_labels.reshape(train_params['batch_size'], -1)
170
+
171
+ loss = criterion(outputs, local_labels)
172
+ optimizer.zero_grad()
173
+ loss.backward()
174
+
175
+ # Update weights
176
+ optimizer.step()
177
+ loss_list.append(loss.detach().cpu().numpy())
178
+
179
+ # report average loss over epoch
180
+ print("training loss: ", np.mean(loss_list))
181
+
182
+ # Validation
183
+ model.eval()
184
+ batch_accuracies = []
185
+ batch_accuracies_iso = []
186
+ batch_loss_val = []
187
+ for local_batch, local_labels in validation_generator:
188
+ # get new batches
189
+ local_batch, local_labels = local_batch.to(device), local_labels.to(device).long()
190
+
191
+ # Model computations
192
+ outputs = model(local_batch)
193
+
194
+ # calc loss
195
+ loss = criterion(outputs, local_labels.reshape(local_labels.shape[0], -1))
196
+ batch_loss_val.append(loss.detach().cpu().numpy())
197
+
198
+ outputs_iso = utils.force_sequential_predictions(outputs, method='isotonic')
199
+ outputs_first = utils.force_sequential_predictions(outputs, method='first')
200
+
201
+ # compute accuracy for whole validation set
202
+ flat_output = torch.argmax(outputs, dim=1).cpu().reshape(local_batch.shape[0], 1, -1)
203
+ compare = flat_output == local_labels.cpu()
204
+ acc = np.sum(compare.numpy(), axis=2) / \
205
+ int(local_batch.shape[-1]) # * params_val['batch_size']
206
+ batch_accuracies.append(np.mean(acc, axis=0)[0])
207
+
208
+ flat_output = torch.argmax(outputs_iso, dim=1).cpu().reshape(local_batch.shape[0], 1, -1)
209
+ compare = flat_output == local_labels.cpu()
210
+ acc = np.sum(compare.numpy(), axis=2) / \
211
+ int(local_batch.shape[-1]) # * params_val['batch_size']
212
+ batch_accuracies_iso.append(np.mean(acc, axis=0)[0])
213
+
214
+ print("validation accuracy: {}".format(np.mean(batch_accuracies)))
215
+ print("validation accuracy isotonic regression: {}".format(np.mean(batch_accuracies_iso)))
216
+ print("validation loss: {}".format(np.mean(batch_loss_val)))
217
+ print("="*50)
218
+
219
+ return model
dijkprofile-annotator/dijkprofile_annotator/utils/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .utils import extract_img
2
+ from .utils import ffill
3
+ from .utils import visualize_sample
4
+ from .utils import visualize_prediction
5
+ from .utils import visualize_files
6
+ from .utils import visualize_dict
7
+ from .utils import train_scaler
8
+ from .utils import get_class_dict
9
+ from .utils import force_sequential_predictions
dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (528 Bytes). View file
 
dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/utils.cpython-39.pyc ADDED
Binary file (10.3 kB). View file
 
dijkprofile-annotator/dijkprofile_annotator/utils/utils.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ from collections import defaultdict
4
+
5
+ import dijkprofile_annotator.preprocessing as preprocessing
6
+ import dijkprofile_annotator.config as config
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import seaborn as sns
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from sklearn.isotonic import IsotonicRegression
13
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
14
+
15
+
16
+ def extract_img(size, in_tensor):
17
+ """
18
+ Args:
19
+ size(int) : size of cut
20
+ in_tensor(tensor) : tensor to be cut
21
+ """
22
+ dim1 = in_tensor.size()[2]
23
+ in_tensor = in_tensor[:, :, int((dim1-size)/2):int((size + (dim1-size)/2))]
24
+ return in_tensor
25
+
26
+
27
+ def ffill(arr):
28
+ """Forward fill utility function.
29
+
30
+ Args:
31
+ arr (np.array): numpy array to fill
32
+
33
+ Returns:
34
+ np.array: filled array.
35
+ """
36
+ mask = np.isnan(arr)
37
+ idx = np.where(~mask, np.arange(mask.shape[1]), 0)
38
+ np.maximum.accumulate(idx, axis=1, out=idx)
39
+ out = arr[np.arange(idx.shape[0])[:,None], idx]
40
+ return out
41
+
42
+ def train_scaler(profile_dict, scaler_type='minmax'):
43
+ """Train a scaler given a profile dict
44
+
45
+ Args:
46
+ profile_dict (dict): dict containing the profile heights and labels
47
+
48
+ Returns:
49
+ sklearn MinMaxScaler or StandardScaler: fitted scaler in sklearn format
50
+ """
51
+ if scaler_type == 'minmax':
52
+ scaler = MinMaxScaler(feature_range=(-1, 1)) # for neural networks -1,1 is better than 0,1
53
+ elif scaler_type == 'standard':
54
+ scaler = StandardScaler()
55
+ else:
56
+ raise NotImplementedError(f"no scaler: {scaler}")
57
+ randkey = random.choice(list(profile_dict.keys()))
58
+ accumulator = np.zeros((len(profile_dict), profile_dict[randkey]['profile'].shape[0]))
59
+
60
+ for i, key in enumerate(profile_dict.keys()):
61
+ accumulator[i, :] = profile_dict[key]['profile']
62
+
63
+ scaler.fit(accumulator.reshape(-1, 1))
64
+ return scaler
65
+
66
+
67
+ def get_class_dict(class_list):
68
+ """Get correct class dicts and weights from config.
69
+
70
+ Args:
71
+ class_list (string): string representing the class mappings to use
72
+
73
+ Raises:
74
+ NotImplementedError: raise if an not implemented class mapping is passed
75
+
76
+ Returns:
77
+ (dict,dict,list): dict with class mappings, inverse of that dict, weights for each class.
78
+ """
79
+ class_list = class_list.lower()
80
+ if class_list == 'regional':
81
+ class_dict = config.CLASS_DICT_REGIONAL
82
+ inverse_class_dict = config.INVERSE_CLASS_DICT_REGIONAL
83
+ class_weights = config.WEIGHT_DICT_REGIONAL
84
+ elif class_list == 'simple':
85
+ class_dict = config.CLASS_DICT_SIMPLE
86
+ class_weights = config.WEIGHT_DICT_SIMPLE
87
+ inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE
88
+ elif class_list == 'berm':
89
+ class_dict = config.CLASS_DICT_SIMPLE_BERM
90
+ class_weights = config.WEIGHT_DICT_SIMPLE_BERM
91
+ inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE_BERM
92
+ elif class_list == 'sloot':
93
+ class_dict = config.CLASS_DICT_SIMPLE_SLOOT
94
+ class_weights = config.WEIGHT_DICT_SIMPLE_SLOOT
95
+ inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE_SLOOT
96
+ elif class_list == 'full':
97
+ class_dict = config.CLASS_DICT_FULL
98
+ class_weights = config.WEIGHT_DICT_FULL
99
+ inverse_class_dict = config.INVERSE_CLASS_DICT_FULL
100
+ else:
101
+ raise NotImplementedError(f"No configs found for class list of type: {class_list}")
102
+ return class_dict, inverse_class_dict, class_weights
103
+
104
+
105
+ def force_sequential_predictions(predictions, method='isotonic'):
106
+ """Force the classes in the sample to always go up from left to right. This is
107
+ makes sense because a higher class could never be left of a lower class in the
108
+ representation chosen here. Two methods are available, Isotonic Regression and
109
+ a group first method. I would use the Isotonic regression.
110
+
111
+ Args:
112
+ predictions (torch.Tensor): Tensor output of the model in shape (batch_size, channel_size, sample_size)
113
+ method (str, optional): method to use for enforcing the sequentiality. Defaults to 'isotonic'.
114
+
115
+ Raises:
116
+ NotImplementedError: if the given method is not implemented
117
+
118
+ Returns:
119
+ torch.Tensor: Tensor in the same shape as the input but then with only increasing classes from left to right.
120
+ """
121
+ predictions = predictions.detach().cpu()
122
+ n_classes = predictions.shape[1] # 1 is the channel dimension
123
+ if method == 'first':
124
+ # loop over batch
125
+ for j in range(predictions.shape[0]):
126
+ pred = torch.argmax(predictions[j], dim=0)
127
+
128
+ # construct dict of groups of start-end indices for class
129
+ groups = defaultdict(list)
130
+ current_class = pred[0]
131
+ group_start_idx = 0
132
+ for i in range(1, len(pred)):
133
+ if pred[i] != current_class:
134
+ groups[current_class.item()].append((group_start_idx, i))
135
+ group_start_idx = i
136
+ current_class = pred[i]
137
+
138
+ # if the class occurs again later in the profile
139
+ # discard this occurance of it
140
+ new_pred = torch.zeros(len(pred))
141
+ last_index = 0
142
+ for class_n, group_tuples in sorted(groups.items()):
143
+ for group_tuple in group_tuples:
144
+ if group_tuple[0] >= last_index:
145
+ new_pred[group_tuple[0]:group_tuple[1]] = class_n
146
+ last_index = group_tuple[1]
147
+ break
148
+
149
+ # simple forward fill
150
+ for i in range(1, len(new_pred)):
151
+ if new_pred[i] == 0:
152
+ new_pred[i] = new_pred[i-1]
153
+
154
+ # encode back to one-hot tensor
155
+ predictions[j] = F.one_hot(new_pred.to(torch.int64), num_classes=n_classes).permute(1,0)
156
+ elif method == 'isotonic':
157
+ for i in range(predictions.shape[0]):
158
+ pred = torch.argmax(predictions[i], dim=0)
159
+
160
+ x = np.arange(0,len(pred))
161
+ iso_reg = IsotonicRegression().fit(x, pred)
162
+ new_pred = iso_reg.predict(x)
163
+ new_pred = np.round(new_pred)
164
+
165
+ # encode back to one-hot tensor
166
+ new_pred = F.one_hot(torch.Tensor(new_pred).to(torch.int64), num_classes=n_classes).permute(1,0)
167
+ predictions[i] = new_pred
168
+ else:
169
+ raise NotImplementedError(f"Unknown method: {method}")
170
+
171
+ return predictions
172
+
173
+
174
+
175
+ def visualize_prediction(heights, prediction, labels, location_name, class_list):
176
+ """visualize a profile plus labels and prediction
177
+
178
+ Args:
179
+ heights (tensor): tensor containing the heights data of the profile
180
+ prediction (tensor): tensor containing the predicted data of the profile
181
+ labels (tensor): tensor containing the labels for each height point in heights
182
+ location_name (str): name of the profile, just for visualization
183
+ class_list (str): class mapping to use, determines which labels are visualized
184
+ """
185
+ class_dict, inverse_class_dict, _ = get_class_dict(class_list)
186
+ fig, ax = plt.subplots(figsize=(20,11))
187
+ plt.title(location_name)
188
+ plt.plot(heights, label='profile')
189
+
190
+ # change one-hot batched format to list of classes
191
+ if prediction.dim() == 3:
192
+ prediction = torch.argmax(torch.squeeze(prediction, dim=0), dim=0)
193
+ if prediction.dim() == 2:
194
+ # assuming channel first representation
195
+ prediction = torch.argmax(prediction, dim=0)
196
+ prediction = prediction.detach().cpu().numpy()
197
+
198
+ # ax.set_ylim(top=np.max(heights), bottom=np.min(heights))
199
+ label_height = np.min(heights)
200
+ n_labels = len(np.unique(labels))
201
+ label_height_distance = (np.max(heights) - np.min(heights))/(n_labels*2)
202
+
203
+ cmap = sns.color_palette("Set2", len(set(class_dict.values())))
204
+
205
+ # plot actual labels
206
+ prev_class_n = 999
207
+ for index, class_n in enumerate(labels):
208
+ if class_n == 0:
209
+ continue
210
+ if class_n != prev_class_n:
211
+ plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
212
+ plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
213
+ label_height += label_height_distance
214
+ prev_class_n = class_n
215
+
216
+ # plot predicted points
217
+ used_classes = []
218
+ prev_class_n = 999
219
+ for index, class_n in enumerate(prediction):
220
+ if class_n == 0 or class_n in used_classes:
221
+ continue
222
+ if class_n != prev_class_n:
223
+ plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(1,1))) # small dots
224
+ plt.text(index, label_height, "predicted " + inverse_class_dict[class_n], rotation=0)
225
+ label_height += label_height_distance
226
+ used_classes.append(prev_class_n)
227
+ prev_class_n = class_n
228
+
229
+ plt.show()
230
+
231
+
232
+ def visualize_sample(heights, labels, location_name, class_list):
233
+ """visualize a profile and labels.
234
+
235
+ Args:
236
+ heights (tensor): tensor containing the heights data of the profile
237
+ labels (tensor): tensor containing the labels for each height point in heights
238
+ location_name (str): name of the profile, just for visualization
239
+ class_list (str): class mapping to use, determines which labels are visualized
240
+ """
241
+ class_dict, inverse_class_dict, _ = get_class_dict(class_list)
242
+ fig, ax = plt.subplots(figsize=(20,11))
243
+ plt.title(location_name)
244
+ plt.plot(heights, label='profile')
245
+
246
+ # ax.set_ylim(top=np.max(heights), bottom=np.min(heights))
247
+ label_height = np.min(heights)
248
+ n_labels = len(np.unique(labels))
249
+ label_height_distance = (np.max(heights) - np.min(heights))/(n_labels*2)
250
+
251
+ cmap = sns.color_palette("Set2", len(set(class_dict.values())))
252
+
253
+ # plot actual labels
254
+ prev_class_n = 999
255
+ for index, class_n in enumerate(labels):
256
+ if class_n == 0:
257
+ continue
258
+ if class_n != prev_class_n:
259
+ plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
260
+ plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
261
+ label_height += label_height_distance
262
+ prev_class_n = class_n
263
+
264
+ plt.show()
265
+
266
+ def visualize_files(linesfp, pointsfp, max_profile_size=512, class_list='simple', location_index=0, return_dict=False):
267
+ """visualize profile lines and points filepaths.
268
+
269
+ Args:
270
+ linesfp (str): path to surfacelines file.
271
+ pointsfp (str): path to points file.
272
+ max_profile_size (int, optional): cutoff size of the profile, can leave on default here. Defaults to 512.
273
+ class_list (str, optional): class mapping to use. Defaults to 'simple'.
274
+ location_index (int, optional): index of profile to visualize.. Defaults to 0.
275
+ return_dict (bool, optional): return the profile dict for faster visualization. Defaults to False.
276
+
277
+ Returns:
278
+ [dict, optional]: profile dict containing the profiles of the given files
279
+ """
280
+ profile_label_dict = preprocessing.filepath_pair_to_labeled_sample(linesfp,
281
+ pointsfp,
282
+ max_profile_size=max_profile_size,
283
+ class_list=class_list)
284
+
285
+ location_name = list(profile_label_dict.keys())[location_index]
286
+ heights = profile_label_dict[location_name]['profile']
287
+ labels = profile_label_dict[location_name]['label']
288
+
289
+ class_dict, inverse_class_dict, _ = get_class_dict(class_list)
290
+ fig, ax = plt.subplots(figsize=(20,11))
291
+ plt.title(location_name)
292
+ plt.plot(heights, label='profile')
293
+
294
+ label_height = np.min(heights)
295
+ n_labels = len(np.unique(labels))
296
+ label_height_distance = (np.max(heights) - np.min(heights))/(n_labels)
297
+
298
+ cmap = sns.color_palette("Set2", len(set(class_dict.values())))
299
+
300
+ # plot actual labels
301
+ prev_class_n = 999
302
+ for index, class_n in enumerate(labels):
303
+ if class_n == 0:
304
+ continue
305
+ if class_n != prev_class_n:
306
+ plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
307
+ plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
308
+ label_height += label_height_distance
309
+ prev_class_n = class_n
310
+
311
+ plt.show()
312
+
313
+ if return_dict:
314
+ return profile_label_dict
315
+
316
+ def visualize_dict(profile_label_dict, class_list='simple', location_index=0):
317
+ """visualise profile with labels from profile_dict, profile specified by index.
318
+
319
+ Args:
320
+ profile_label_dict (dict): dict containing profiles and labels
321
+ class_list (str, optional): class_mapping to use for visualization. Defaults to 'simple'.
322
+ location_index (int, optional): specifies the index of the profile to visualize. Defaults to 0.
323
+ """
324
+ location_name = list(profile_label_dict.keys())[location_index]
325
+ heights = profile_label_dict[location_name]['profile']
326
+ labels = profile_label_dict[location_name]['label']
327
+
328
+ class_dict, inverse_class_dict, _ = get_class_dict(class_list)
329
+ fig, ax = plt.subplots(figsize=(20,11))
330
+ plt.title(location_name)
331
+ plt.plot(heights, label='profile')
332
+
333
+ label_height = np.min(heights)
334
+ n_labels = len(np.unique(labels))
335
+ label_height_distance = (np.max(heights) - np.min(heights))/(n_labels)
336
+
337
+ cmap = sns.color_palette("Set2", len(set(class_dict.values())))
338
+
339
+ # plot actual labels
340
+ prev_class_n = 999
341
+ for index, class_n in enumerate(labels):
342
+ if class_n == 0:
343
+ continue
344
+ if class_n != prev_class_n:
345
+ plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
346
+ plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
347
+ label_height += label_height_distance
348
+ prev_class_n = class_n
349
+
350
+ plt.show()
dijkprofile-annotator/dijkprofile_annotator/web/.gitkeep ADDED
File without changes
dijkprofile-annotator/dijkprofile_annotator/web/app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import math
3
+ import gradio as gr
4
+ import dijkprofile_annotator
5
+ from zipfile import ZipFile
6
+
7
+ def annotate_file(file_objects, model_type):
8
+ # TODO: actually use different model types based on selected model, only a well trained dijk model is available now.
9
+ generated_charfiles = []
10
+ str1 = "Starting processing of files."
11
+ pad1 = math.floor((os.get_terminal_size().columns - len(str1)) / 2) * "="
12
+ print(pad1 + "Starting processing of files." + pad1)
13
+ for i, file_obj in enumerate(file_objects):
14
+ target_filepath = f"/tmp/characteristicpoints_{i}.csv"
15
+ print(f" Processing file '{file_obj.name}', model '{model_type}', saving to '{target_filepath}'")
16
+ dijkprofile_annotator.annotate(file_obj.name, target_filepath, device='cpu')
17
+ generated_charfiles.append(target_filepath)
18
+ print(f" finished processing: {file_obj.name}! saved to : {target_filepath}")
19
+ print(" ", "-" * (os.get_terminal_size().columns - 5))
20
+
21
+ print("finished with all processing!")
22
+ # return the csv file if only 1 file was given, return a zip otherwise.
23
+ if len(generated_charfiles) == 1:
24
+ print(f"returning file: {generated_charfiles[0]}")
25
+ return generated_charfiles[0]
26
+ else:
27
+ return_zipfile = "/tmp/characterist_points.zip"
28
+ with ZipFile(return_zipfile, 'w') as zipObj:
29
+ for filepath in generated_charfiles:
30
+ zipObj.write(filepath)
31
+ print(f"returning file: {return_zipfile}")
32
+ return return_zipfile
33
+
34
+ description = "Upload een surfacelines.csv bestand in QDAMEdit format en krijg een annotatie file in characteristicpoints format terug \n" +\
35
+ "Een neural netwerk gebaseerd op image segmentation heeft geleerd op basis van ~6000 geannoteerde profielen om zo goed mogelijk automatisch de punten te plaatsen op de profielen.\n" +\
36
+ "Er zijn meerdere modellen beschikbaar om de annotatie te genereren, het 'dijk' model probeert alleen de dijk te vinden, het 'dijk+sloot' model zoekt ook naar een sloot en het 'volledig' model " +\
37
+ "probeert zo veel mogelijk van de punten beschikbaar in het QDAMEdit format te vinden. Probeer eerst het 'dijk' model aangezien hier de consistentste resultaten uit komen."
38
+
39
+ iface = gr.Interface(
40
+ fn=annotate_file,
41
+ title="Dijkprofiel Annotator",
42
+ description=description,
43
+ inputs=[gr.inputs.File(file_count="multiple", type="file", label="te annoteren surfacelines files", optional=False), gr.inputs.Dropdown(['dijk', 'dijk+sloot', "volledig"], type="value", default=None, label='Model type')],
44
+ outputs=gr.outputs.File(label="gegenereerde file"))
45
+ iface.launch()
dijkprofile-annotator/output/models/.gitkeep ADDED
File without changes
dijkprofile-annotator/output/models/scaler.pik ADDED
Binary file (611 Bytes). View file
 
dijkprofile-annotator/output/reports/.gitkeep ADDED
File without changes
dijkprofile-annotator/output/visualizations/.gitkeep ADDED
File without changes
dijkprofile-annotator/run/configs/.gitkeep ADDED
File without changes
dijkprofile-annotator/setup.cfg ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [pycodestyle]
2
+ max-line-length = 120
3
+ exclude = .ipynb_checkpoints
4
+
5
+ [pep8]
6
+ max-line-length = 120
7
+ exclude = .ipynb_checkpoints
8
+
9
+ [flake8]
10
+ max-line-length = 120
11
+ exclude = .ipynb_checkpoints
12
+ max-complexity = 10
dijkprofile-annotator/setup.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(name='dijkprofile_annotator',
4
+ version='0.1.0',
5
+ description='Automatically annotate drijkprofile in qDAMEdit format',
6
+ long_description=open('README.md').read(),
7
+ url='',
8
+ author='Jonathan Gerbscheid',
9
+ author_email='j.gerbscheid@hetwaterschapshuis.nl',
10
+ license='MIT',
11
+ package_dir={"dijkprofile_annotator": "dijkprofile_annotator"},
12
+ packages=find_packages(),
13
+ zip_safe=False,
14
+ install_requires=["joblib",
15
+ "matplotlib",
16
+ "numpy",
17
+ "pillow",
18
+ "scikit_learn>=1.0.1",
19
+ "seaborn",
20
+ "torch>=1.9.0"]
21
+ )
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ joblib==1.1.0
2
+ matplotlib==3.5.0
3
+ numpy==1.21.4
4
+ Pillow==8.4.0
5
+ scikit_learn==1.0.1
6
+ seaborn==0.11.2
7
+ torch==1.10.0
8
+ ./dijkprofile-annotator