Spaces:
Runtime error
Runtime error
jgerbscheid
commited on
Commit
·
b9bac12
1
Parent(s):
bcb2589
initial commit
Browse files- README.md +4 -32
- app.py +45 -0
- dijkprofile-annotator/LICENSE +21 -0
- dijkprofile-annotator/conftest.py +10 -0
- dijkprofile-annotator/dijkprofile_annotator/__init__.py +13 -0
- dijkprofile-annotator/dijkprofile_annotator/__pycache__/__init__.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/__pycache__/annotator.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/__pycache__/config.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/annotator.py +98 -0
- dijkprofile-annotator/dijkprofile_annotator/config.py +169 -0
- dijkprofile-annotator/dijkprofile_annotator/data/trained_models/dijknet_simple_95.pt +3 -0
- dijkprofile-annotator/dijkprofile_annotator/data/trained_models/scaler.pik +0 -0
- dijkprofile-annotator/dijkprofile_annotator/dataset/__init__.py +1 -0
- dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/__init__.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/dataset.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/dataset/dataset.py +42 -0
- dijkprofile-annotator/dijkprofile_annotator/models/__init__.py +1 -0
- dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/__init__.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/dijknet.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/models/dijknet.py +109 -0
- dijkprofile-annotator/dijkprofile_annotator/preprocessing/__init__.py +8 -0
- dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/__init__.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/link_collector.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/preprocessing.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/preprocessing/preprocessing.py +355 -0
- dijkprofile-annotator/dijkprofile_annotator/requirements.txt +7 -0
- dijkprofile-annotator/dijkprofile_annotator/training/__init__.py +4 -0
- dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/__init__.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/train.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/training/train.py +219 -0
- dijkprofile-annotator/dijkprofile_annotator/utils/__init__.py +9 -0
- dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/utils.cpython-39.pyc +0 -0
- dijkprofile-annotator/dijkprofile_annotator/utils/utils.py +350 -0
- dijkprofile-annotator/dijkprofile_annotator/web/.gitkeep +0 -0
- dijkprofile-annotator/dijkprofile_annotator/web/app.py +45 -0
- dijkprofile-annotator/output/models/.gitkeep +0 -0
- dijkprofile-annotator/output/models/scaler.pik +0 -0
- dijkprofile-annotator/output/reports/.gitkeep +0 -0
- dijkprofile-annotator/output/visualizations/.gitkeep +0 -0
- dijkprofile-annotator/run/configs/.gitkeep +0 -0
- dijkprofile-annotator/setup.cfg +12 -0
- dijkprofile-annotator/setup.py +21 -0
- requirements.txt +8 -0
README.md
CHANGED
@@ -1,37 +1,9 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
---
|
10 |
-
|
11 |
-
# Configuration
|
12 |
-
|
13 |
-
`title`: _string_
|
14 |
-
Display title for the Space
|
15 |
-
|
16 |
-
`emoji`: _string_
|
17 |
-
Space emoji (emoji-only character allowed)
|
18 |
-
|
19 |
-
`colorFrom`: _string_
|
20 |
-
Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
|
21 |
-
|
22 |
-
`colorTo`: _string_
|
23 |
-
Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
|
24 |
-
|
25 |
-
`sdk`: _string_
|
26 |
-
Can be either `gradio` or `streamlit`
|
27 |
-
|
28 |
-
`sdk_version` : _string_
|
29 |
-
Only applicable for `streamlit` SDK.
|
30 |
-
See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
|
31 |
-
|
32 |
-
`app_file`: _string_
|
33 |
-
Path to your main application file (which contains either `gradio` or `streamlit` Python code).
|
34 |
-
Path is relative to the root of the repository.
|
35 |
-
|
36 |
-
`pinned`: _boolean_
|
37 |
-
Whether the Space stays on top of your list.
|
|
|
1 |
---
|
2 |
+
title: dijkprofile-annotator
|
3 |
+
emoji: 💦
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import math
|
3 |
+
import gradio as gr
|
4 |
+
import dijkprofile_annotator
|
5 |
+
from zipfile import ZipFile
|
6 |
+
|
7 |
+
def annotate_file(file_objects, model_type):
|
8 |
+
# TODO: actually use different model types based on selected model, only a well trained dijk model is available now.
|
9 |
+
generated_charfiles = []
|
10 |
+
str1 = "Starting processing of files."
|
11 |
+
pad1 = math.floor((os.get_terminal_size().columns - len(str1)) / 2) * "="
|
12 |
+
print(pad1 + "Starting processing of files." + pad1)
|
13 |
+
for i, file_obj in enumerate(file_objects):
|
14 |
+
target_filepath = f"/tmp/characteristicpoints_{i}.csv"
|
15 |
+
print(f" Processing file '{file_obj.name}', model '{model_type}', saving to '{target_filepath}'")
|
16 |
+
dijkprofile_annotator.annotate(file_obj.name, target_filepath, device='cpu')
|
17 |
+
generated_charfiles.append(target_filepath)
|
18 |
+
print(f" finished processing: {file_obj.name}! saved to : {target_filepath}")
|
19 |
+
print(" ", "-" * (os.get_terminal_size().columns - 5))
|
20 |
+
|
21 |
+
print("finished with all processing!")
|
22 |
+
# return the csv file if only 1 file was given, return a zip otherwise.
|
23 |
+
if len(generated_charfiles) == 1:
|
24 |
+
print(f"returning file: {generated_charfiles[0]}")
|
25 |
+
return generated_charfiles[0]
|
26 |
+
else:
|
27 |
+
return_zipfile = "/tmp/characterist_points.zip"
|
28 |
+
with ZipFile(return_zipfile, 'w') as zipObj:
|
29 |
+
for filepath in generated_charfiles:
|
30 |
+
zipObj.write(filepath)
|
31 |
+
print(f"returning file: {return_zipfile}")
|
32 |
+
return return_zipfile
|
33 |
+
|
34 |
+
description = "Upload een surfacelines.csv bestand in QDAMEdit format en krijg een annotatie file in characteristicpoints format terug \n" +\
|
35 |
+
"Een neural netwerk gebaseerd op image segmentation heeft geleerd op basis van ~6000 geannoteerde profielen om zo goed mogelijk automatisch de punten te plaatsen op de profielen.\n" +\
|
36 |
+
"Er zijn meerdere modellen beschikbaar om de annotatie te genereren, het 'dijk' model probeert alleen de dijk te vinden, het 'dijk+sloot' model zoekt ook naar een sloot en het 'volledig' model " +\
|
37 |
+
"probeert zo veel mogelijk van de punten beschikbaar in het QDAMEdit format te vinden. Probeer eerst het 'dijk' model aangezien hier de consistentste resultaten uit komen."
|
38 |
+
|
39 |
+
iface = gr.Interface(
|
40 |
+
fn=annotate_file,
|
41 |
+
title="Dijkprofiel Annotator",
|
42 |
+
description=description,
|
43 |
+
inputs=[gr.inputs.File(file_count="multiple", type="file", label="te annoteren surfacelines files", optional=False), gr.inputs.Dropdown(['dijk', 'dijk+sloot', "volledig"], type="value", default=None, label='Model type')],
|
44 |
+
outputs=gr.outputs.File(label="gegenereerde file"))
|
45 |
+
iface.launch()
|
dijkprofile-annotator/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2021 Het Waterschapshuis / Kenniscentrum / Tooling
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
dijkprofile-annotator/conftest.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import pytest
|
3 |
+
|
4 |
+
|
5 |
+
# This is used to get code coverage working correctly when running unit tests
|
6 |
+
def pytest_collection_modifyitems(items):
|
7 |
+
no_cov = pytest.mark.no_cover
|
8 |
+
for item in items:
|
9 |
+
if "integration" in Path(item.fspath).parts:
|
10 |
+
item.add_marker(no_cov)
|
dijkprofile-annotator/dijkprofile_annotator/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from . import models
|
2 |
+
from . import dataset
|
3 |
+
from . import training
|
4 |
+
from . import utils
|
5 |
+
from . import config
|
6 |
+
from . import preprocessing
|
7 |
+
from .utils import visualize_sample
|
8 |
+
from .utils import visualize_prediction
|
9 |
+
from .utils import visualize_files
|
10 |
+
from .utils import visualize_dict
|
11 |
+
from .annotator import annotate
|
12 |
+
from .annotator import make_predictions
|
13 |
+
from .annotator import write_predictions_
|
dijkprofile-annotator/dijkprofile_annotator/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (637 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/__pycache__/annotator.cpython-39.pyc
ADDED
Binary file (2.74 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/__pycache__/config.cpython-39.pyc
ADDED
Binary file (4.18 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/annotator.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import os
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
|
7 |
+
import dijkprofile_annotator.config as config
|
8 |
+
import dijkprofile_annotator.utils as utils
|
9 |
+
import dijkprofile_annotator.preprocessing as preprocessing
|
10 |
+
from dijkprofile_annotator.models import Dijknet
|
11 |
+
|
12 |
+
|
13 |
+
def annotate(surfacelines_filepath, outputfile, class_list='simple', max_profile_length=512, custom_model_path=None, custom_scaler_path=None, device=None):
|
14 |
+
surfacelines_dict = preprocessing.read_surfaceline_file(surfacelines_filepath)
|
15 |
+
profile_dict = preprocessing.make_height_profiles(surfacelines_dict, max_profile_length)
|
16 |
+
|
17 |
+
dir = os.path.dirname(__file__)
|
18 |
+
|
19 |
+
if device:
|
20 |
+
device = device
|
21 |
+
else:
|
22 |
+
# setup model
|
23 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24 |
+
|
25 |
+
class_dict, _, _ = utils.get_class_dict(class_list)
|
26 |
+
model = Dijknet(1, len(class_dict))
|
27 |
+
|
28 |
+
if custom_model_path:
|
29 |
+
model.load_state_dict(torch.load(custom_model_path, map_location=device))
|
30 |
+
else:
|
31 |
+
model.load_state_dict(torch.load(os.path.join(dir, config.MODEL_PATH), map_location=device))
|
32 |
+
model.eval()
|
33 |
+
|
34 |
+
# copy network to device
|
35 |
+
model = model.to(device)
|
36 |
+
|
37 |
+
predictions = make_predictions(model, profile_dict, max_profile_length, device)
|
38 |
+
|
39 |
+
write_predictions_(predictions, profile_dict, surfacelines_dict, outputfile, class_list)
|
40 |
+
|
41 |
+
|
42 |
+
def make_predictions(model, profile_dict, max_profile_length, device):
|
43 |
+
accumulator = np.zeros((len(profile_dict), max_profile_length))
|
44 |
+
for i, key in enumerate(profile_dict.keys()):
|
45 |
+
accumulator[i] = profile_dict[key]['profile'][:max_profile_length]
|
46 |
+
|
47 |
+
accumulator = accumulator.reshape(accumulator.shape[0], 1, max_profile_length)
|
48 |
+
|
49 |
+
outputs = model(torch.tensor(accumulator).to(device).float())
|
50 |
+
flat_output = torch.argmax(outputs, dim=1).cpu()
|
51 |
+
predictions = flat_output.numpy()
|
52 |
+
return predictions
|
53 |
+
|
54 |
+
|
55 |
+
def write_predictions_(predictions, profile_dict, surfacelines_dict, output_filepath, class_list):
|
56 |
+
class_dict, inverse_class_dict, class_weights = utils.get_class_dict(class_list)
|
57 |
+
|
58 |
+
with open(output_filepath, 'w') as csvFile:
|
59 |
+
writer = csv.writer(csvFile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
|
60 |
+
writer.writerow(config.HEADER)
|
61 |
+
for i, key in enumerate(profile_dict.keys()):
|
62 |
+
# get predictions
|
63 |
+
profile_pred = predictions[i]
|
64 |
+
|
65 |
+
# construct dict with key for each row
|
66 |
+
row_dict = {key:-1 for key in config.HEADER}
|
67 |
+
row_dict["LOCATIONID"] = key
|
68 |
+
|
69 |
+
# loop through predictions and for the entries
|
70 |
+
used_classes = []
|
71 |
+
prev_class_n = 999 # key thats not in the inverse_class_dict
|
72 |
+
for index, class_n in enumerate(profile_pred):
|
73 |
+
if class_n == 0 or class_n in used_classes:
|
74 |
+
continue
|
75 |
+
if class_n != prev_class_n:
|
76 |
+
# get class name
|
77 |
+
class_name = inverse_class_dict[class_n]
|
78 |
+
|
79 |
+
# if this index is different from the last, this is the characteristicpoint
|
80 |
+
used_classes.append(prev_class_n)
|
81 |
+
|
82 |
+
# set prev_class to the new class
|
83 |
+
prev_class_n = class_n
|
84 |
+
|
85 |
+
# construct the csv row with the new class
|
86 |
+
if index >= len(surfacelines_dict[key]):
|
87 |
+
continue
|
88 |
+
|
89 |
+
(x,y,z) = surfacelines_dict[key][index]
|
90 |
+
row_dict["X_" + class_name] = round(x, 3)
|
91 |
+
row_dict["Y_" + class_name] = round(y, 3)
|
92 |
+
row_dict["Z_" + class_name] = round(z, 3)
|
93 |
+
|
94 |
+
# write the row to the csv file
|
95 |
+
row = []
|
96 |
+
for columnname in config.HEADER:
|
97 |
+
row.append(row_dict[columnname])
|
98 |
+
writer.writerow(row)
|
dijkprofile-annotator/dijkprofile_annotator/config.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
CHARPOINT_CONVERSION_DICT = {
|
4 |
+
"": "leeg",
|
5 |
+
"101_Q19_2": "buitenkruin",
|
6 |
+
"101_Q19_3": "binnenkruin",
|
7 |
+
"101_Q19_5": "binnenteen",
|
8 |
+
"105_T09_11": "insteek_sloot",
|
9 |
+
"811_T13_8": "leeg",
|
10 |
+
"351_T03_10": "leeg",
|
11 |
+
"_T01_KKW": "leeg",
|
12 |
+
"108_Q06_250": "leeg",
|
13 |
+
"303_Q05_1": "leeg",
|
14 |
+
"353__11": "leeg",
|
15 |
+
"_T00_17": "leeg",
|
16 |
+
"109_Q08_13": "leeg",
|
17 |
+
"_Q07_KDM": "leeg",
|
18 |
+
"_Q07_KDW": "leeg",
|
19 |
+
'0': "leeg",
|
20 |
+
None: "leeg",
|
21 |
+
'nan': "leeg"
|
22 |
+
}
|
23 |
+
|
24 |
+
CLASS_DICT_REGIONAL = {
|
25 |
+
"leeg": 0,
|
26 |
+
"startpunt": 1,
|
27 |
+
"buitenkruin": 2,
|
28 |
+
"binnenkruin": 3,
|
29 |
+
"binnenteen": 4,
|
30 |
+
"insteek_sloot": 5
|
31 |
+
}
|
32 |
+
|
33 |
+
WEIGHT_DICT_REGIONAL = [0.1, 1.0, 1.1, 1.0, 0.1]
|
34 |
+
|
35 |
+
CLASS_DICT_FULL = {
|
36 |
+
'leeg': 0,
|
37 |
+
'Maaiveld binnenwaarts': 1,
|
38 |
+
'Insteek sloot polderzijde': 2,
|
39 |
+
'Slootbodem polderzijde': 3,
|
40 |
+
'Slootbodem dijkzijde': 4,
|
41 |
+
'Insteek sloot dijkzijde': 5,
|
42 |
+
'Teen dijk binnenwaarts': 6,
|
43 |
+
'Kruin binnenberm': 7,
|
44 |
+
'Insteek binnenberm': 8,
|
45 |
+
'Kruin binnentalud': 9,
|
46 |
+
'Verkeersbelasting kant binnenwaarts': 9, # 10
|
47 |
+
'Verkeersbelasting kant buitenwaarts': 10,
|
48 |
+
'Kruin buitentalud': 10, # 12
|
49 |
+
'Insteek buitenberm': 11,
|
50 |
+
'Kruin buitenberm': 12,
|
51 |
+
'Teen dijk buitenwaarts': 13,
|
52 |
+
'Insteek geul': 14,
|
53 |
+
'Teen geul': 15,
|
54 |
+
'Maaiveld buitenwaarts': 16,
|
55 |
+
}
|
56 |
+
|
57 |
+
# TODO: write this out explicitely
|
58 |
+
WEIGHT_DICT_FULL = [1.0] * 17
|
59 |
+
|
60 |
+
CLASS_DICT_SIMPLE = {
|
61 |
+
'leeg': 0,
|
62 |
+
'Maaiveld buitenwaarts': 1,
|
63 |
+
'Teen dijk buitenwaarts': 2,
|
64 |
+
'Kruin buitentalud': 3,
|
65 |
+
'Kruin binnentalud': 4,
|
66 |
+
'Teen dijk binnenwaarts': 5,
|
67 |
+
}
|
68 |
+
|
69 |
+
WEIGHT_DICT_SIMPLE = [0.1, 0.5, 0.7, 1.0, 1.0, 0.5]
|
70 |
+
|
71 |
+
CLASS_DICT_SIMPLE_SLOOT = {
|
72 |
+
'leeg': 0,
|
73 |
+
'Maaiveld buitenwaarts': 1,
|
74 |
+
'Teen dijk buitenwaarts': 2,
|
75 |
+
'Kruin buitentalud': 3,
|
76 |
+
'Kruin binnentalud': 4,
|
77 |
+
'Teen dijk binnenwaarts': 5,
|
78 |
+
'Insteek sloot dijkzijde': 6,
|
79 |
+
'Insteek sloot polderzijde': 7,
|
80 |
+
'Slootbodem polderzijde': 8,
|
81 |
+
'Slootbodem dijkzijde': 9,
|
82 |
+
}
|
83 |
+
|
84 |
+
WEIGHT_DICT_SIMPLE_SLOOT = [0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]
|
85 |
+
|
86 |
+
CLASS_DICT_SIMPLE_BERM = {
|
87 |
+
'leeg': 0,
|
88 |
+
'Maaiveld buitenwaarts': 1,
|
89 |
+
'Teen dijk buitenwaarts': 2,
|
90 |
+
'Kruin buitentalud': 3,
|
91 |
+
'Kruin binnentalud': 4,
|
92 |
+
'Teen dijk binnenwaarts': 5,
|
93 |
+
'Insteek sloot dijkzijde': 6,
|
94 |
+
'Insteek sloot polderzijde': 7,
|
95 |
+
'Slootbodem polderzijde': 8,
|
96 |
+
'Slootbodem dijkzijde': 9,
|
97 |
+
'Kruin binnenberm': 10,
|
98 |
+
'Insteek binnenberm': 11,
|
99 |
+
}
|
100 |
+
WEIGHT_DICT_SIMPLE_BERM = [0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]
|
101 |
+
|
102 |
+
HEADER = ["LOCATIONID",
|
103 |
+
"X_Maaiveld binnenwaarts",
|
104 |
+
"Y_Maaiveld binnenwaarts",
|
105 |
+
"Z_Maaiveld binnenwaarts",
|
106 |
+
"X_Insteek sloot polderzijde",
|
107 |
+
"Y_Insteek sloot polderzijde",
|
108 |
+
"Z_Insteek sloot polderzijde",
|
109 |
+
"X_Slootbodem polderzijde",
|
110 |
+
"Y_Slootbodem polderzijde",
|
111 |
+
"Z_Slootbodem polderzijde",
|
112 |
+
"X_Slootbodem dijkzijde",
|
113 |
+
"Y_Slootbodem dijkzijde",
|
114 |
+
"Z_Slootbodem dijkzijde",
|
115 |
+
"X_Insteek sloot dijkzijde",
|
116 |
+
"Y_Insteek sloot dijkzijde",
|
117 |
+
"Z_Insteek sloot dijkzijde",
|
118 |
+
"X_Teen dijk binnenwaarts",
|
119 |
+
"Y_Teen dijk binnenwaarts",
|
120 |
+
"Z_Teen dijk binnenwaarts",
|
121 |
+
"X_Kruin binnenberm",
|
122 |
+
"Y_Kruin binnenberm",
|
123 |
+
"Z_Kruin binnenberm",
|
124 |
+
"X_Insteek binnenberm",
|
125 |
+
"Y_Insteek binnenberm",
|
126 |
+
"Z_Insteek binnenberm",
|
127 |
+
"X_Kruin binnentalud",
|
128 |
+
"Y_Kruin binnentalud",
|
129 |
+
"Z_Kruin binnentalud",
|
130 |
+
"X_Verkeersbelasting kant binnenwaarts",
|
131 |
+
"Y_Verkeersbelasting kant binnenwaarts",
|
132 |
+
"Z_Verkeersbelasting kant binnenwaarts",
|
133 |
+
"X_Verkeersbelasting kant buitenwaarts",
|
134 |
+
"Y_Verkeersbelasting kant buitenwaarts",
|
135 |
+
"Z_Verkeersbelasting kant buitenwaarts",
|
136 |
+
"X_Kruin buitentalud",
|
137 |
+
"Y_Kruin buitentalud",
|
138 |
+
"Z_Kruin buitentalud",
|
139 |
+
"X_Insteek buitenberm",
|
140 |
+
"Y_Insteek buitenberm",
|
141 |
+
"Z_Insteek buitenberm",
|
142 |
+
"X_Kruin buitenberm",
|
143 |
+
"Y_Kruin buitenberm",
|
144 |
+
"Z_Kruin buitenberm",
|
145 |
+
"X_Teen dijk buitenwaarts",
|
146 |
+
"Y_Teen dijk buitenwaarts",
|
147 |
+
"Z_Teen dijk buitenwaarts",
|
148 |
+
"X_Insteek geul",
|
149 |
+
"Y_Insteek geul",
|
150 |
+
"Z_Insteek geul",
|
151 |
+
"X_Teen geul",
|
152 |
+
"Y_Teen geul",
|
153 |
+
"Z_Teen geul",
|
154 |
+
"X_Maaiveld buitenwaarts",
|
155 |
+
"Y_Maaiveld buitenwaarts",
|
156 |
+
"Z_Maaiveld buitenwaarts"]
|
157 |
+
|
158 |
+
SCALER_PATH = os.path.join("data", "trained_models", "scaler.pik")
|
159 |
+
MODEL_PATH = os.path.join('data', 'trained_models', 'dijknet_simple_95.pt')
|
160 |
+
|
161 |
+
INVERSE_CLASS_DICT_FULL = {v: k for k, v in CLASS_DICT_FULL.items()}
|
162 |
+
INVERSE_CLASS_DICT_SIMPLE = {v: k for k, v in CLASS_DICT_SIMPLE.items()}
|
163 |
+
INVERSE_CLASS_DICT_SIMPLE_BERM = {v: k for k, v in CLASS_DICT_SIMPLE_BERM.items()}
|
164 |
+
INVERSE_CLASS_DICT_SIMPLE_SLOOT = {v: k for k, v in CLASS_DICT_SIMPLE_SLOOT.items()}
|
165 |
+
INVERSE_CLASS_DICT_REGIONAL = {v: k for k, v in CLASS_DICT_REGIONAL.items()}
|
166 |
+
|
167 |
+
# manual mappings to get the correct names for plotting later
|
168 |
+
if 11 in INVERSE_CLASS_DICT_FULL:
|
169 |
+
INVERSE_CLASS_DICT_FULL[10] = 'Kruin buitentalud'
|
dijkprofile-annotator/dijkprofile_annotator/data/trained_models/dijknet_simple_95.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97f131655b81f0fb8f03fb7162f42a1b9baae1bd0fda064c761a0af35b4858e0
|
3 |
+
size 106467655
|
dijkprofile-annotator/dijkprofile_annotator/data/trained_models/scaler.pik
ADDED
Binary file (420 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/dataset/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .dataset import DijkprofileDataset
|
dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (267 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/dataset.cpython-39.pyc
ADDED
Binary file (1.81 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/dataset/dataset.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch.utils.data as data
|
3 |
+
|
4 |
+
|
5 |
+
class DijkprofileDataset(data.Dataset):
|
6 |
+
"""Pytorch custom dataset class to use with the pytorch dataloader."""
|
7 |
+
|
8 |
+
def __init__(self, profile_dict, partition, custom_scaler_path=None):
|
9 |
+
"""Dijkprofile Dataset, provides profiles and labels to pytorch model.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
profile_dict (dict): dict containing the profiles and labels
|
13 |
+
partition (list): list used to split the dataset into train and test
|
14 |
+
sets. list contains ids to use for this dataset, format is
|
15 |
+
as returned by sklearn.model_selection.train_test_split
|
16 |
+
"""
|
17 |
+
self.data_dict = profile_dict
|
18 |
+
self.list_IDs = partition
|
19 |
+
|
20 |
+
print("scaler in dataset class is depracated and moved to preprocessing")
|
21 |
+
# load scaler
|
22 |
+
# if custom_scaler_path:
|
23 |
+
# self.scaler = joblib.load(custom_scaler_path)
|
24 |
+
# else:
|
25 |
+
# self.scaler = joblib.load(os.path.join(dir, config.SCALER_PATH))
|
26 |
+
# # rescale all profiles profiles
|
27 |
+
# for key in profile_dict.keys():
|
28 |
+
# profile_dict[key]['profile'] = self.scaler.transform(
|
29 |
+
# profile_dict[key]['profile'].reshape(-1, 1)).reshape(-1)
|
30 |
+
# profile_dict[key]['profile'] = profile_dict[key]['profile'] / 10
|
31 |
+
|
32 |
+
def __len__(self):
|
33 |
+
return len(self.list_IDs)
|
34 |
+
|
35 |
+
def __getitem__(self, index):
|
36 |
+
id = self.list_IDs[index]
|
37 |
+
X = self.data_dict[id]['profile'].reshape(1,-1).astype(np.float32)
|
38 |
+
y = self.data_dict[id]['label'].reshape(1,-1)
|
39 |
+
return X, y
|
40 |
+
|
41 |
+
def __str__(self):
|
42 |
+
return "<Dijkprofile dataset: datapoints={}>".format(len(self.list_IDs))
|
dijkprofile-annotator/dijkprofile_annotator/models/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .dijknet import Dijknet
|
dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (255 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/dijknet.cpython-39.pyc
ADDED
Binary file (3.86 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/models/dijknet.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from dijkprofile_annotator.utils import extract_img
|
4 |
+
|
5 |
+
|
6 |
+
class Double_conv(nn.Module):
|
7 |
+
'''(conv => ReLU) * 2 => MaxPool2d'''
|
8 |
+
def __init__(self, in_ch, out_ch, p):
|
9 |
+
"""
|
10 |
+
Args:
|
11 |
+
in_ch(int) : input channel
|
12 |
+
out_ch(int) : output channel
|
13 |
+
"""
|
14 |
+
super(Double_conv, self).__init__()
|
15 |
+
self.conv = nn.Sequential(
|
16 |
+
nn.Conv1d(in_ch, out_ch, 3, padding=1, stride=1),
|
17 |
+
nn.ReLU(inplace=True),
|
18 |
+
nn.Conv1d(out_ch, out_ch, 5, padding=2, stride=1),
|
19 |
+
nn.ReLU(inplace=True),
|
20 |
+
nn.Conv1d(out_ch, out_ch, 7, padding=3, stride=1),
|
21 |
+
nn.ReLU(inplace=True),
|
22 |
+
nn.Dropout(p=p)
|
23 |
+
)
|
24 |
+
def forward(self, x):
|
25 |
+
x = self.conv(x)
|
26 |
+
return x
|
27 |
+
|
28 |
+
|
29 |
+
class Conv_down(nn.Module):
|
30 |
+
'''(conv => ReLU) * 2 => MaxPool2d'''
|
31 |
+
|
32 |
+
def __init__(self, in_ch, out_ch, p):
|
33 |
+
"""
|
34 |
+
Args:
|
35 |
+
in_ch(int) : input channel
|
36 |
+
out_ch(int) : output channel
|
37 |
+
"""
|
38 |
+
super(Conv_down, self).__init__()
|
39 |
+
self.conv = Double_conv(in_ch, out_ch, p)
|
40 |
+
self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
|
41 |
+
|
42 |
+
def forward(self, x):
|
43 |
+
x = self.conv(x)
|
44 |
+
pool_x = self.pool(x)
|
45 |
+
return pool_x, x
|
46 |
+
|
47 |
+
|
48 |
+
class Conv_up(nn.Module):
|
49 |
+
'''(conv => ReLU) * 2 => MaxPool2d'''
|
50 |
+
|
51 |
+
def __init__(self, in_ch, out_ch, p):
|
52 |
+
"""
|
53 |
+
Args:
|
54 |
+
in_ch(int) : input channel
|
55 |
+
out_ch(int) : output channel
|
56 |
+
"""
|
57 |
+
super(Conv_up, self).__init__()
|
58 |
+
self.up = nn.ConvTranspose1d(in_ch, out_ch, kernel_size=2, stride=2)
|
59 |
+
self.conv = Double_conv(in_ch, out_ch, p)
|
60 |
+
|
61 |
+
def forward(self, x1, x2):
|
62 |
+
x1 = self.up(x1)
|
63 |
+
x1_dim = x1.size()[2]
|
64 |
+
x2 = extract_img(x1_dim, x2)
|
65 |
+
x1 = torch.cat((x1, x2), dim=1)
|
66 |
+
x1 = self.conv(x1)
|
67 |
+
return x1
|
68 |
+
|
69 |
+
|
70 |
+
class Dijknet(nn.Module):
|
71 |
+
"""Dijknet convolutional neural network. 1D Unet variant."""
|
72 |
+
|
73 |
+
def __init__(self, in_channels, out_channels, p=0.25):
|
74 |
+
"""Dijknet convlutional neural network, 1D Unet Variant. Model is probably a bit too big
|
75 |
+
for what it needs to do, but it seems to work just fine.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
in_channels (int): number of input channels, should be 1
|
79 |
+
out_channels (int): number of output channels/classes
|
80 |
+
p (float, optional): dropout chance for the dropout layers. Defaults to 0.25.
|
81 |
+
"""
|
82 |
+
super(Dijknet, self).__init__()
|
83 |
+
self.Conv_down1 = Conv_down(in_channels, 64, p)
|
84 |
+
self.Conv_down2 = Conv_down(64, 128, p)
|
85 |
+
self.Conv_down3 = Conv_down(128, 256, p)
|
86 |
+
self.Conv_down4 = Conv_down(256, 512, p)
|
87 |
+
self.Conv_down5 = Conv_down(512, 1024, p)
|
88 |
+
self.Conv_up1 = Conv_up(1024, 512, p)
|
89 |
+
self.Conv_up2 = Conv_up(512, 256, p)
|
90 |
+
self.Conv_up3 = Conv_up(256, 128, p)
|
91 |
+
self.Conv_up4 = Conv_up(128, 64, p)
|
92 |
+
self.Conv_up5 = Conv_up(128, 64, p)
|
93 |
+
self.Conv_out = nn.Conv1d(64, out_channels, 1, padding=0, stride=1)
|
94 |
+
self.Conv_final = nn.Conv1d(out_channels, out_channels, 1, padding=0, stride=1)
|
95 |
+
|
96 |
+
def forward(self, x):
|
97 |
+
x, conv1 = self.Conv_down1(x)
|
98 |
+
x, conv2 = self.Conv_down2(x)
|
99 |
+
x, conv3 = self.Conv_down3(x)
|
100 |
+
x, conv4 = self.Conv_down4(x)
|
101 |
+
_, x = self.Conv_down5(x)
|
102 |
+
x = self.Conv_up1(x, conv4)
|
103 |
+
x = self.Conv_up2(x, conv3)
|
104 |
+
x = self.Conv_up3(x, conv2)
|
105 |
+
x = self.Conv_up4(x, conv1)
|
106 |
+
# final upscale to true size
|
107 |
+
x = self.Conv_out(x)
|
108 |
+
x = self.Conv_final(x)
|
109 |
+
return x
|
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__init__.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .preprocessing import filepath_pair_to_labeled_sample
|
2 |
+
from .preprocessing import file_pairs_to_tensor_profiles
|
3 |
+
from .preprocessing import read_charpoints_file
|
4 |
+
from .preprocessing import read_surfaceline_file
|
5 |
+
from .preprocessing import make_height_profiles
|
6 |
+
from .preprocessing import make_labeled_height_profiles
|
7 |
+
from .preprocessing import get_file_pairs_from_dir
|
8 |
+
from .preprocessing import load_datasets
|
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (571 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/link_collector.cpython-39.pyc
ADDED
Binary file (1.02 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/preprocessing.cpython-39.pyc
ADDED
Binary file (11.2 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/preprocessing/preprocessing.py
ADDED
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import os
|
3 |
+
from operator import itemgetter
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
from dijkprofile_annotator.config import (CLASS_DICT_FULL, CLASS_DICT_REGIONAL,
|
7 |
+
CLASS_DICT_SIMPLE,
|
8 |
+
CLASS_DICT_SIMPLE_BERM,
|
9 |
+
CLASS_DICT_SIMPLE_SLOOT)
|
10 |
+
from dijkprofile_annotator.dataset import DijkprofileDataset
|
11 |
+
from sklearn.model_selection import train_test_split
|
12 |
+
|
13 |
+
|
14 |
+
def read_surfaceline_file(surfaceline_fp):
|
15 |
+
"""Read surfaceline file and convert to dict.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
surfaceline_fp (string): path to the surfacelines file.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
dict: dict containing list of points per location.
|
22 |
+
"""
|
23 |
+
# read the coordinates and collect to surfaceline_dict
|
24 |
+
surfacelines = {}
|
25 |
+
with open(surfaceline_fp) as csvfile:
|
26 |
+
surfacereader = csv.reader(csvfile, delimiter=';', quotechar='|')
|
27 |
+
next(surfacereader) # skip header
|
28 |
+
# print("header: {}".format(header)) # not very useful
|
29 |
+
stop_exec = False
|
30 |
+
for row in surfacereader:
|
31 |
+
if stop_exec:
|
32 |
+
break
|
33 |
+
location = row[0]
|
34 |
+
surfacelines[location] = []
|
35 |
+
for i in range(1, len(row)-2, 3):
|
36 |
+
# some files have empty points
|
37 |
+
if row[i] == '' or row[i+1] == '' or row[i+2] == '':
|
38 |
+
continue
|
39 |
+
try:
|
40 |
+
|
41 |
+
x = _parse_coordinate(row[i].replace('"', ''))
|
42 |
+
y = _parse_coordinate(row[i+1].replace('"', ''))
|
43 |
+
z = _parse_coordinate(row[i+2].replace('"', ''))
|
44 |
+
surfacelines[location].append((x, y, z))
|
45 |
+
except ValueError as e:
|
46 |
+
print(f"error reading point from surfaceline at location: {location} (index: {i}), error: {e}")
|
47 |
+
stop_exec = True
|
48 |
+
break
|
49 |
+
return surfacelines
|
50 |
+
|
51 |
+
|
52 |
+
def read_charpoints_file(charlines_fp):
|
53 |
+
"""Read characteristicpoints file and convert to dict.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
charlines_fp (string): path to characteristicpoints file.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
dict: dict containing list of points per location.
|
60 |
+
"""
|
61 |
+
charpoints = {}
|
62 |
+
with open(charlines_fp) as csvfile:
|
63 |
+
cpointsreader = csv.reader(csvfile, delimiter=';', quotechar='|')
|
64 |
+
header = next(cpointsreader)
|
65 |
+
stop_exec = False
|
66 |
+
for idx, row in enumerate(cpointsreader):
|
67 |
+
if stop_exec:
|
68 |
+
break
|
69 |
+
try:
|
70 |
+
location = row[0]
|
71 |
+
except IndexError as e:
|
72 |
+
print(f"couldn't read location in row: {row} at {idx}, file: {charlines_fp}")
|
73 |
+
point_dict = {}
|
74 |
+
for i in range(1, len(row)-2, 3):
|
75 |
+
if row[i] == '' or row[i+1] == '' or row[i+2] == '':
|
76 |
+
continue
|
77 |
+
try:
|
78 |
+
x = _parse_coordinate(row[i].replace('"', ''))
|
79 |
+
y = _parse_coordinate(row[i+1].replace('"', ''))
|
80 |
+
z = _parse_coordinate(row[i+2].replace('"', ''))
|
81 |
+
|
82 |
+
point_dict[header[i][2:]] = (x, y, z)
|
83 |
+
except ValueError as e:
|
84 |
+
print(
|
85 |
+
f"error reading point from characteristicpoints at location: {location} (index: {i}), error: {e}")
|
86 |
+
stop_exec = True
|
87 |
+
|
88 |
+
charpoints[location] = point_dict
|
89 |
+
return charpoints
|
90 |
+
|
91 |
+
|
92 |
+
def _parse_coordinate(coord):
|
93 |
+
"""Convert string point coordinate to float, remove double dots if needed.
|
94 |
+
Some of the coordinates contain multiple dots, probably because someone
|
95 |
+
opened the file in excel and it formatted it weird. In all examples I've
|
96 |
+
seen the first point is only to indicate 1000's and can savely be removed
|
97 |
+
|
98 |
+
Args:
|
99 |
+
point (str): string representation of the number to parse
|
100 |
+
|
101 |
+
Returns:
|
102 |
+
float: float representation of the coordinate
|
103 |
+
"""
|
104 |
+
try:
|
105 |
+
return float(coord)
|
106 |
+
except:
|
107 |
+
parts = coord.split(".")
|
108 |
+
return float("".join(parts[:-1]) + "." + parts[-1])
|
109 |
+
|
110 |
+
|
111 |
+
def make_height_profiles(surfaceline_dict, max_profile_size):
|
112 |
+
"""Make height arrays from surfacelines dict.
|
113 |
+
|
114 |
+
Args:
|
115 |
+
surfaceline_dict (dict): dict of surfacelines by location.
|
116 |
+
max_profile_size (int): fixed max size for the height profile.
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
dict: dict containing height profiles by location.
|
120 |
+
"""
|
121 |
+
profile_dict = {}
|
122 |
+
for location in surfaceline_dict.keys():
|
123 |
+
heights = np.array(surfaceline_dict[location])[:, 2].astype(np.float32)
|
124 |
+
|
125 |
+
# we'll fit whole profile in a fixed length so that multiple profiles can be used as samples
|
126 |
+
z_tmp = np.zeros(max_profile_size)
|
127 |
+
profile_length = heights.shape[0]
|
128 |
+
if profile_length < max_profile_size:
|
129 |
+
z_tmp[:profile_length] = np.array(heights, dtype=np.float32)[:profile_length]
|
130 |
+
z_tmp[profile_length:] = heights[profile_length-1]
|
131 |
+
heights = z_tmp
|
132 |
+
else:
|
133 |
+
heights = heights[:max_profile_size]
|
134 |
+
profile_dict[location] = {"profile": heights}
|
135 |
+
return profile_dict
|
136 |
+
|
137 |
+
|
138 |
+
def make_labeled_height_profiles(surfaceline_dict, cpoints_dict, max_profile_size, class_list='simple', require_all_points=True):
|
139 |
+
"""Make height profile and labels from surfacelines and cpoints.
|
140 |
+
|
141 |
+
Args:
|
142 |
+
surfaceline_dict (dict): dict of surfacelines by location.
|
143 |
+
cpoints_dict (dict): dict of characteristic points by location.
|
144 |
+
max_profile_size (int): fixed max size for the height profile.
|
145 |
+
class_list (bool): selection of classes to use, see config.
|
146 |
+
require_all_points: filter profiles that do not contain all the points in the class_list.
|
147 |
+
|
148 |
+
Returns:
|
149 |
+
dict: dict containing height profiles and their labels by location.
|
150 |
+
"""
|
151 |
+
profile_label_dict = {}
|
152 |
+
|
153 |
+
class_list = class_list.lower()
|
154 |
+
class_dict = {}
|
155 |
+
if class_list == 'regional':
|
156 |
+
class_dict = CLASS_DICT_REGIONAL
|
157 |
+
elif class_list == 'simple':
|
158 |
+
class_dict = CLASS_DICT_SIMPLE
|
159 |
+
elif class_list == 'berm':
|
160 |
+
class_dict = CLASS_DICT_SIMPLE_BERM
|
161 |
+
elif class_list == 'sloot':
|
162 |
+
class_dict = CLASS_DICT_SIMPLE_SLOOT
|
163 |
+
elif class_list == 'full':
|
164 |
+
class_dict = CLASS_DICT_FULL
|
165 |
+
else:
|
166 |
+
raise NotImplementedError(f"No class list available of type: {class_list}")
|
167 |
+
|
168 |
+
required_point_types = list(class_dict.keys())
|
169 |
+
required_point_types.remove('leeg') # we don't want to require check for the empty class
|
170 |
+
|
171 |
+
for location in surfaceline_dict.keys():
|
172 |
+
heights = np.array(surfaceline_dict[location])[:, 2].astype(np.float32)
|
173 |
+
labels = np.zeros(len(heights))
|
174 |
+
|
175 |
+
# if no labels were given for this location, skip it
|
176 |
+
if not location in cpoints_dict.keys():
|
177 |
+
# print(f"location not in cpoints dict, {location}")
|
178 |
+
continue
|
179 |
+
|
180 |
+
# skip the location if the required points are not all present
|
181 |
+
if require_all_points:
|
182 |
+
labeled_point_types = [key for key, value in cpoints_dict[location].items() if value != (-1.0, -1.0, -1.0)]
|
183 |
+
if not all([point_type in labeled_point_types for point_type in required_point_types]):
|
184 |
+
# print(f"not all point types present, missing {set(required_point_types) - set(labeled_point_types)}")
|
185 |
+
continue
|
186 |
+
|
187 |
+
for i, (key, point) in enumerate(cpoints_dict[location].items()):
|
188 |
+
# if the point is not empty, find the nearest point in the surface file,
|
189 |
+
# problems with rounding errors require matching by distance per point
|
190 |
+
if point == (-1.0, -1.0, -1.0):
|
191 |
+
continue
|
192 |
+
|
193 |
+
distances = []
|
194 |
+
for idx, surfacepoint in enumerate(surfaceline_dict[location]):
|
195 |
+
dist = np.linalg.norm(np.array(surfacepoint)-np.array(point))
|
196 |
+
distances.append((idx, dist))
|
197 |
+
(idx, dist) = sorted(distances, key=itemgetter(1))[0]
|
198 |
+
if key in class_dict:
|
199 |
+
labels[idx] = class_dict[key]
|
200 |
+
|
201 |
+
# forward fill the labels
|
202 |
+
for i in range(1, len(labels)):
|
203 |
+
if labels[i] == 0.0:
|
204 |
+
labels[i] = labels[i-1]
|
205 |
+
|
206 |
+
# we'll fit whole profile in a fixed length so that multiple profiles can be used as samples
|
207 |
+
z_tmp = np.zeros(max_profile_size)
|
208 |
+
labels_tmp = np.zeros(max_profile_size)
|
209 |
+
profile_length = labels.shape[0]
|
210 |
+
if profile_length < max_profile_size:
|
211 |
+
z_tmp[:profile_length] = np.array(heights, dtype=np.float32)[:profile_length]
|
212 |
+
labels_tmp[:profile_length] = np.array(labels)[:profile_length]
|
213 |
+
z_tmp[profile_length:] = heights[profile_length-1]
|
214 |
+
labels_tmp[profile_length:] = labels[profile_length-1]
|
215 |
+
heights = z_tmp
|
216 |
+
labels = labels_tmp
|
217 |
+
else:
|
218 |
+
heights = heights[:max_profile_size]
|
219 |
+
labels = labels[:max_profile_size]
|
220 |
+
|
221 |
+
# rescale every profile to between -1 and 1
|
222 |
+
# scaler = MinMaxScaler(feature_range=(-1, 1))
|
223 |
+
# heights = scaler.fit_transform(heights.reshape(-1, 1))
|
224 |
+
|
225 |
+
profile_label_dict[location] = {}
|
226 |
+
profile_label_dict[location]['profile'] = heights.astype(np.float32)
|
227 |
+
profile_label_dict[location]['label'] = labels.astype(np.int32)
|
228 |
+
return profile_label_dict
|
229 |
+
|
230 |
+
|
231 |
+
def filepath_pair_to_labeled_sample(source_surfacelines, source_characteristicpoints, max_profile_size=352, class_list='simple', require_all_points=True):
|
232 |
+
"""Convert pair of surfacelines and characteristicpoints filepaths to format suited for machine learning.
|
233 |
+
|
234 |
+
Args:
|
235 |
+
source_surfacelines (string): path to the surfacelines file.
|
236 |
+
source_characteristicpoints (string): path to the characteristicpoints file.
|
237 |
+
max_profile_size (int, optional): max size for the profile. Defaults to 352.
|
238 |
+
regional (bool): use regional point labelset, see config. Defaults to False.
|
239 |
+
|
240 |
+
Returns:
|
241 |
+
dict: dict containing height profile and labels by location.
|
242 |
+
"""
|
243 |
+
surfaceline_dict = read_surfaceline_file(source_surfacelines)
|
244 |
+
cpoints_dict = read_charpoints_file(source_characteristicpoints)
|
245 |
+
|
246 |
+
profile_label_dict = make_labeled_height_profiles(
|
247 |
+
surfaceline_dict,
|
248 |
+
cpoints_dict,
|
249 |
+
max_profile_size,
|
250 |
+
class_list=class_list,
|
251 |
+
require_all_points=require_all_points)
|
252 |
+
return profile_label_dict
|
253 |
+
|
254 |
+
|
255 |
+
def file_pairs_to_tensor_profiles(filepair_list, max_profile_size=352, class_list='simple', require_all_points=True):
|
256 |
+
"""Convert list of pairs of surfacelines and characteristicpoints to format suited for machine learning.
|
257 |
+
|
258 |
+
Args:
|
259 |
+
filepair_list (list): list of tuples containing the paths to the surfacelines and characteristicpoints files.
|
260 |
+
max_profile_size (int, optional): max size for the profile. Defaults to 352.
|
261 |
+
regional (bool): use regional point labelset, see config. Defaults to False.
|
262 |
+
|
263 |
+
Returns:
|
264 |
+
dict: Dict containing all the height profiles and labels by location.
|
265 |
+
"""
|
266 |
+
all_profiles = {}
|
267 |
+
for source_surfacelines, source_characteristicpoints in filepair_list:
|
268 |
+
profile_label_dict = filepath_pair_to_labeled_sample(
|
269 |
+
source_surfacelines,
|
270 |
+
source_characteristicpoints,
|
271 |
+
max_profile_size,
|
272 |
+
class_list,
|
273 |
+
require_all_points=require_all_points)
|
274 |
+
for key, value in profile_label_dict.items():
|
275 |
+
all_profiles[key] = value
|
276 |
+
return all_profiles
|
277 |
+
|
278 |
+
|
279 |
+
def get_file_pairs_from_dir(path, krp_format=False):
|
280 |
+
"""Recursively get all pairs of lines and points files in a directory.
|
281 |
+
|
282 |
+
Args:
|
283 |
+
path (str): path to the root directory containing the lines and points csv files,
|
284 |
+
directory is searched recursively for pairs.
|
285 |
+
krp (bool): Indicates that the folder contains csv files in the naming convention used by
|
286 |
+
waterschap Vallei en Veluwe.
|
287 |
+
|
288 |
+
Returns:
|
289 |
+
list: list of tuples where the first item is the path to the surfacelines.csv and the second
|
290 |
+
the path to the characteristicpoints.csv
|
291 |
+
"""
|
292 |
+
if krp_format:
|
293 |
+
return _get_file_pairs_from_dir_krp(path)
|
294 |
+
list_of_files = []
|
295 |
+
for (dirpath, _, filenames) in os.walk(path):
|
296 |
+
for filename in filenames:
|
297 |
+
if filename.endswith('lines.csv'):
|
298 |
+
if os.path.exists(os.sep.join([dirpath, filename])) and \
|
299 |
+
os.path.exists(os.sep.join([dirpath, 'characteristicpoints.csv'])):
|
300 |
+
|
301 |
+
list_of_files.append((
|
302 |
+
os.sep.join([dirpath, filename]),
|
303 |
+
os.sep.join([dirpath, 'characteristicpoints.csv'])))
|
304 |
+
return list_of_files
|
305 |
+
|
306 |
+
|
307 |
+
def _get_file_pairs_from_dir_krp(path):
|
308 |
+
"""Recursively get all pairs of lines and points files in a directory but in the format used
|
309 |
+
by Waterschap Vallei en Veluwe, same functionality as get_file_pairs_from_dir.
|
310 |
+
|
311 |
+
Args:
|
312 |
+
path (str): path to the root directory containing the lines and points csv files,
|
313 |
+
directory is searched recursively for pairs
|
314 |
+
|
315 |
+
Returns:
|
316 |
+
list: list of tuples where the first item is the path to the surfacelines.csv and the second
|
317 |
+
the path to the characteristicpoints.csv
|
318 |
+
"""
|
319 |
+
list_of_files = []
|
320 |
+
for (dirpath, _, filenames) in os.walk(path):
|
321 |
+
for filename in filenames:
|
322 |
+
if filename.endswith('.krp.csv'):
|
323 |
+
if os.path.exists(os.sep.join([dirpath, filename])) and \
|
324 |
+
os.path.exists(os.sep.join([dirpath, filename.split(".krp")[0] + ".csv"])):
|
325 |
+
|
326 |
+
list_of_files.append((
|
327 |
+
os.sep.join([dirpath, filename.split(".krp")[0] + ".csv"]),
|
328 |
+
os.sep.join([dirpath, filename])))
|
329 |
+
return list_of_files
|
330 |
+
|
331 |
+
|
332 |
+
def load_datasets(annotation_tuples, custom_scaler_path=None, test_size=0.2, max_profile_size=512, class_list='simple', require_all_points=True):
|
333 |
+
"""Load datasets given list of annotation tuples.
|
334 |
+
|
335 |
+
Args:
|
336 |
+
annotation_tuples ([(str,str)]): list of tuples of filepaths to the lines and points files.
|
337 |
+
custom_scaler_path (str, optional): path to a custom scaler to rescale the data. Defaults to None.
|
338 |
+
test_size (float, optional): Test size for the training. Defaults to 0.2.
|
339 |
+
max_profile_size (int, optional): max profile size. Defaults to 512.
|
340 |
+
class_list (str, optional): class_mapping/class_list to use. Defaults to 'simple'.
|
341 |
+
require_all_points (bool, optional): wether to drop profiles that don't contain all points in the mapping. Defaults to True.
|
342 |
+
|
343 |
+
Returns:
|
344 |
+
DijkprofileDataset, DijkprofileDataset: train and test dataset classes
|
345 |
+
"""
|
346 |
+
profile_dict = file_pairs_to_tensor_profiles(annotation_tuples, max_profile_size=max_profile_size, class_list=class_list, require_all_points=require_all_points)
|
347 |
+
|
348 |
+
# construct dataloaders
|
349 |
+
id_list = list(profile_dict.keys())
|
350 |
+
[train, test] = train_test_split(id_list, shuffle=True, test_size=test_size)
|
351 |
+
|
352 |
+
dataset_train = DijkprofileDataset(profile_dict, train, custom_scaler_path=custom_scaler_path)
|
353 |
+
dataset_validation = DijkprofileDataset(profile_dict, test, custom_scaler_path=custom_scaler_path)
|
354 |
+
|
355 |
+
return dataset_train, dataset_validation
|
dijkprofile-annotator/dijkprofile_annotator/requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
joblib==1.1.0
|
2 |
+
matplotlib==3.5.0
|
3 |
+
numpy==1.21.4
|
4 |
+
Pillow==8.4.0
|
5 |
+
scikit_learn==1.0.1
|
6 |
+
seaborn==0.11.2
|
7 |
+
torch==1.10.0
|
dijkprofile-annotator/dijkprofile_annotator/training/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .train import train
|
2 |
+
from .train import get_loss_train
|
3 |
+
from .train import accuracy_check
|
4 |
+
from .train import accuracy_check_for_batch
|
dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (367 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/train.cpython-39.pyc
ADDED
Binary file (5.97 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/training/train.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dijkprofile_annotator.preprocessing as preprocessing
|
2 |
+
import dijkprofile_annotator.utils as utils
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
from dijkprofile_annotator.models import Dijknet
|
7 |
+
from PIL import Image
|
8 |
+
from torch.utils.data import DataLoader
|
9 |
+
from tqdm import tqdm
|
10 |
+
|
11 |
+
|
12 |
+
def get_loss_train(model, data_train, criterion):
|
13 |
+
"""generate loss over train set.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
model (): model to use for prediction
|
17 |
+
data_train (torch.utils.data.DataLoader)): Dataloader containing the profiles
|
18 |
+
and labels
|
19 |
+
criterion (pytorch loss function, probably nn.CrossEntropyLoss): loss function to be used.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
float: total accuracy
|
23 |
+
float: total loss
|
24 |
+
"""
|
25 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
26 |
+
model.eval()
|
27 |
+
total_acc = 0
|
28 |
+
total_loss = 0
|
29 |
+
for batch, (profile, masks) in enumerate(data_train):
|
30 |
+
with torch.no_grad():
|
31 |
+
profile = torch.Tensor(profile).to(device)
|
32 |
+
masks = torch.Tensor(masks).to(device)
|
33 |
+
outputs = model(profile)
|
34 |
+
loss = criterion(outputs, masks)
|
35 |
+
preds = torch.argmax(outputs, dim=1).float()
|
36 |
+
acc = accuracy_check_for_batch(masks.cpu(), preds.cpu(), profile.size()[0])
|
37 |
+
total_acc = total_acc + acc
|
38 |
+
total_loss = total_loss + loss.cpu().item()
|
39 |
+
return total_acc/(batch+1), total_loss/(batch + 1)
|
40 |
+
|
41 |
+
|
42 |
+
def accuracy_check(mask, prediction):
|
43 |
+
"""check accuracy of prediciton.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
mask (torch.Tensor, PIL Image or str): labels
|
47 |
+
prediction (torch.Tensor, PIL Image or str): predictions
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
float: accuracy of prediction given mask.
|
51 |
+
"""
|
52 |
+
ims = [mask, prediction]
|
53 |
+
np_ims = []
|
54 |
+
for item in ims:
|
55 |
+
if 'str' in str(type(item)):
|
56 |
+
item = np.array(Image.open(item))
|
57 |
+
elif 'PIL' in str(type(item)):
|
58 |
+
item = np.array(item)
|
59 |
+
elif 'torch' in str(type(item)):
|
60 |
+
item = item.numpy()
|
61 |
+
np_ims.append(item)
|
62 |
+
|
63 |
+
compare = np.equal(np_ims[0], np_ims[1])
|
64 |
+
accuracy = np.sum(compare)
|
65 |
+
|
66 |
+
return accuracy/len(np_ims[0].flatten())
|
67 |
+
|
68 |
+
|
69 |
+
def accuracy_check_for_batch(masks, predictions, batch_size):
|
70 |
+
"""check accuracy of prediciton given mask.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
masks (torch.Tensor): labels
|
74 |
+
predictions (torch.Tensor): predictions
|
75 |
+
batch_size (int): batch size of prediciton/mask.
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
float: accuracy of prediction given mask.
|
79 |
+
"""
|
80 |
+
total_acc = 0
|
81 |
+
for index in range(batch_size):
|
82 |
+
total_acc += accuracy_check(masks[index], predictions[index])
|
83 |
+
return total_acc/batch_size
|
84 |
+
|
85 |
+
|
86 |
+
def train(annotation_tuples,
|
87 |
+
epochs=100,
|
88 |
+
batch_size_train=32,
|
89 |
+
batch_size_val=512,
|
90 |
+
num_workers=6,
|
91 |
+
custom_scaler_path=None,
|
92 |
+
class_list='simple',
|
93 |
+
test_size=0.2,
|
94 |
+
max_profile_size=512,
|
95 |
+
shuffle=True):
|
96 |
+
"""[summary]
|
97 |
+
|
98 |
+
Args:
|
99 |
+
annotation_tuples ([type]): [description]
|
100 |
+
epochs (int, optional): [description]. Defaults to 100.
|
101 |
+
batch_size_train (int, optional): [description]. Defaults to 32.
|
102 |
+
batch_size_val (int, optional): [description]. Defaults to 512.
|
103 |
+
num_workers (int, optional): [description]. Defaults to 6.
|
104 |
+
custom_scaler_path ([type], optional): [description]. Defaults to None.
|
105 |
+
class_list (str, optional): [description]. Defaults to 'simple'.
|
106 |
+
test_size (float, optional): [description]. Defaults to 0.2.
|
107 |
+
max_profile_size (int, optional): [description]. Defaults to 512.
|
108 |
+
shuffle (bool, optional): [description]. Defaults to True.
|
109 |
+
|
110 |
+
Raises:
|
111 |
+
NotImplementedError: when given class_list is not implemented
|
112 |
+
|
113 |
+
Returns:
|
114 |
+
[type]: trained Dijknet model.
|
115 |
+
"""
|
116 |
+
print(f"loading datasets")
|
117 |
+
train_dataset, test_dataset = preprocessing.load_datasets(annotation_tuples,
|
118 |
+
custom_scaler_path=custom_scaler_path,
|
119 |
+
test_size=test_size,
|
120 |
+
max_profile_size=max_profile_size)
|
121 |
+
print(f"loaded datasets:")
|
122 |
+
print(f" train: {len(train_dataset)} samples")
|
123 |
+
print(f" test: {len(test_dataset)} samples")
|
124 |
+
|
125 |
+
class_dict, _, class_weights = utils.get_class_dict(class_list)
|
126 |
+
|
127 |
+
print(f"constructing model with {len(class_dict)} output classes")
|
128 |
+
model = Dijknet(1, len(class_dict))
|
129 |
+
|
130 |
+
# parameters
|
131 |
+
train_params = {'batch_size': batch_size_train,
|
132 |
+
'shuffle': shuffle,
|
133 |
+
'num_workers': num_workers}
|
134 |
+
|
135 |
+
params_val = {'batch_size': batch_size_val,
|
136 |
+
'shuffle': False,
|
137 |
+
'num_workers': num_workers}
|
138 |
+
|
139 |
+
training_generator = DataLoader(train_dataset, **train_params)
|
140 |
+
validation_generator = DataLoader(test_dataset, **params_val)
|
141 |
+
|
142 |
+
# CUDA for PyTorch
|
143 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
144 |
+
model = model.to(device)
|
145 |
+
|
146 |
+
# loss
|
147 |
+
criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weights).to(device))
|
148 |
+
|
149 |
+
# Optimizer
|
150 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
151 |
+
|
152 |
+
print("starting training.")
|
153 |
+
# Loop over epochs
|
154 |
+
for epoch in range(epochs):
|
155 |
+
print("epoch: {}".format(epoch))
|
156 |
+
# Training
|
157 |
+
loss_list = []
|
158 |
+
model.train()
|
159 |
+
for local_batch, local_labels in tqdm(training_generator):
|
160 |
+
# bug with dataloader, it doesn't return the right size batch when it runs out of samples
|
161 |
+
if not local_labels.shape[0] == train_params['batch_size']:
|
162 |
+
continue
|
163 |
+
|
164 |
+
# Transfer to GPU
|
165 |
+
local_batch, local_labels = local_batch.to(device), local_labels.to(device).long()
|
166 |
+
|
167 |
+
# Model computations
|
168 |
+
outputs = model(local_batch)
|
169 |
+
local_labels = local_labels.reshape(train_params['batch_size'], -1)
|
170 |
+
|
171 |
+
loss = criterion(outputs, local_labels)
|
172 |
+
optimizer.zero_grad()
|
173 |
+
loss.backward()
|
174 |
+
|
175 |
+
# Update weights
|
176 |
+
optimizer.step()
|
177 |
+
loss_list.append(loss.detach().cpu().numpy())
|
178 |
+
|
179 |
+
# report average loss over epoch
|
180 |
+
print("training loss: ", np.mean(loss_list))
|
181 |
+
|
182 |
+
# Validation
|
183 |
+
model.eval()
|
184 |
+
batch_accuracies = []
|
185 |
+
batch_accuracies_iso = []
|
186 |
+
batch_loss_val = []
|
187 |
+
for local_batch, local_labels in validation_generator:
|
188 |
+
# get new batches
|
189 |
+
local_batch, local_labels = local_batch.to(device), local_labels.to(device).long()
|
190 |
+
|
191 |
+
# Model computations
|
192 |
+
outputs = model(local_batch)
|
193 |
+
|
194 |
+
# calc loss
|
195 |
+
loss = criterion(outputs, local_labels.reshape(local_labels.shape[0], -1))
|
196 |
+
batch_loss_val.append(loss.detach().cpu().numpy())
|
197 |
+
|
198 |
+
outputs_iso = utils.force_sequential_predictions(outputs, method='isotonic')
|
199 |
+
outputs_first = utils.force_sequential_predictions(outputs, method='first')
|
200 |
+
|
201 |
+
# compute accuracy for whole validation set
|
202 |
+
flat_output = torch.argmax(outputs, dim=1).cpu().reshape(local_batch.shape[0], 1, -1)
|
203 |
+
compare = flat_output == local_labels.cpu()
|
204 |
+
acc = np.sum(compare.numpy(), axis=2) / \
|
205 |
+
int(local_batch.shape[-1]) # * params_val['batch_size']
|
206 |
+
batch_accuracies.append(np.mean(acc, axis=0)[0])
|
207 |
+
|
208 |
+
flat_output = torch.argmax(outputs_iso, dim=1).cpu().reshape(local_batch.shape[0], 1, -1)
|
209 |
+
compare = flat_output == local_labels.cpu()
|
210 |
+
acc = np.sum(compare.numpy(), axis=2) / \
|
211 |
+
int(local_batch.shape[-1]) # * params_val['batch_size']
|
212 |
+
batch_accuracies_iso.append(np.mean(acc, axis=0)[0])
|
213 |
+
|
214 |
+
print("validation accuracy: {}".format(np.mean(batch_accuracies)))
|
215 |
+
print("validation accuracy isotonic regression: {}".format(np.mean(batch_accuracies_iso)))
|
216 |
+
print("validation loss: {}".format(np.mean(batch_loss_val)))
|
217 |
+
print("="*50)
|
218 |
+
|
219 |
+
return model
|
dijkprofile-annotator/dijkprofile_annotator/utils/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import extract_img
|
2 |
+
from .utils import ffill
|
3 |
+
from .utils import visualize_sample
|
4 |
+
from .utils import visualize_prediction
|
5 |
+
from .utils import visualize_files
|
6 |
+
from .utils import visualize_dict
|
7 |
+
from .utils import train_scaler
|
8 |
+
from .utils import get_class_dict
|
9 |
+
from .utils import force_sequential_predictions
|
dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (528 Bytes). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/utils.cpython-39.pyc
ADDED
Binary file (10.3 kB). View file
|
|
dijkprofile-annotator/dijkprofile_annotator/utils/utils.py
ADDED
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
from collections import defaultdict
|
4 |
+
|
5 |
+
import dijkprofile_annotator.preprocessing as preprocessing
|
6 |
+
import dijkprofile_annotator.config as config
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import numpy as np
|
9 |
+
import seaborn as sns
|
10 |
+
import torch
|
11 |
+
import torch.nn.functional as F
|
12 |
+
from sklearn.isotonic import IsotonicRegression
|
13 |
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
14 |
+
|
15 |
+
|
16 |
+
def extract_img(size, in_tensor):
|
17 |
+
"""
|
18 |
+
Args:
|
19 |
+
size(int) : size of cut
|
20 |
+
in_tensor(tensor) : tensor to be cut
|
21 |
+
"""
|
22 |
+
dim1 = in_tensor.size()[2]
|
23 |
+
in_tensor = in_tensor[:, :, int((dim1-size)/2):int((size + (dim1-size)/2))]
|
24 |
+
return in_tensor
|
25 |
+
|
26 |
+
|
27 |
+
def ffill(arr):
|
28 |
+
"""Forward fill utility function.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
arr (np.array): numpy array to fill
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
np.array: filled array.
|
35 |
+
"""
|
36 |
+
mask = np.isnan(arr)
|
37 |
+
idx = np.where(~mask, np.arange(mask.shape[1]), 0)
|
38 |
+
np.maximum.accumulate(idx, axis=1, out=idx)
|
39 |
+
out = arr[np.arange(idx.shape[0])[:,None], idx]
|
40 |
+
return out
|
41 |
+
|
42 |
+
def train_scaler(profile_dict, scaler_type='minmax'):
|
43 |
+
"""Train a scaler given a profile dict
|
44 |
+
|
45 |
+
Args:
|
46 |
+
profile_dict (dict): dict containing the profile heights and labels
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
sklearn MinMaxScaler or StandardScaler: fitted scaler in sklearn format
|
50 |
+
"""
|
51 |
+
if scaler_type == 'minmax':
|
52 |
+
scaler = MinMaxScaler(feature_range=(-1, 1)) # for neural networks -1,1 is better than 0,1
|
53 |
+
elif scaler_type == 'standard':
|
54 |
+
scaler = StandardScaler()
|
55 |
+
else:
|
56 |
+
raise NotImplementedError(f"no scaler: {scaler}")
|
57 |
+
randkey = random.choice(list(profile_dict.keys()))
|
58 |
+
accumulator = np.zeros((len(profile_dict), profile_dict[randkey]['profile'].shape[0]))
|
59 |
+
|
60 |
+
for i, key in enumerate(profile_dict.keys()):
|
61 |
+
accumulator[i, :] = profile_dict[key]['profile']
|
62 |
+
|
63 |
+
scaler.fit(accumulator.reshape(-1, 1))
|
64 |
+
return scaler
|
65 |
+
|
66 |
+
|
67 |
+
def get_class_dict(class_list):
|
68 |
+
"""Get correct class dicts and weights from config.
|
69 |
+
|
70 |
+
Args:
|
71 |
+
class_list (string): string representing the class mappings to use
|
72 |
+
|
73 |
+
Raises:
|
74 |
+
NotImplementedError: raise if an not implemented class mapping is passed
|
75 |
+
|
76 |
+
Returns:
|
77 |
+
(dict,dict,list): dict with class mappings, inverse of that dict, weights for each class.
|
78 |
+
"""
|
79 |
+
class_list = class_list.lower()
|
80 |
+
if class_list == 'regional':
|
81 |
+
class_dict = config.CLASS_DICT_REGIONAL
|
82 |
+
inverse_class_dict = config.INVERSE_CLASS_DICT_REGIONAL
|
83 |
+
class_weights = config.WEIGHT_DICT_REGIONAL
|
84 |
+
elif class_list == 'simple':
|
85 |
+
class_dict = config.CLASS_DICT_SIMPLE
|
86 |
+
class_weights = config.WEIGHT_DICT_SIMPLE
|
87 |
+
inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE
|
88 |
+
elif class_list == 'berm':
|
89 |
+
class_dict = config.CLASS_DICT_SIMPLE_BERM
|
90 |
+
class_weights = config.WEIGHT_DICT_SIMPLE_BERM
|
91 |
+
inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE_BERM
|
92 |
+
elif class_list == 'sloot':
|
93 |
+
class_dict = config.CLASS_DICT_SIMPLE_SLOOT
|
94 |
+
class_weights = config.WEIGHT_DICT_SIMPLE_SLOOT
|
95 |
+
inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE_SLOOT
|
96 |
+
elif class_list == 'full':
|
97 |
+
class_dict = config.CLASS_DICT_FULL
|
98 |
+
class_weights = config.WEIGHT_DICT_FULL
|
99 |
+
inverse_class_dict = config.INVERSE_CLASS_DICT_FULL
|
100 |
+
else:
|
101 |
+
raise NotImplementedError(f"No configs found for class list of type: {class_list}")
|
102 |
+
return class_dict, inverse_class_dict, class_weights
|
103 |
+
|
104 |
+
|
105 |
+
def force_sequential_predictions(predictions, method='isotonic'):
|
106 |
+
"""Force the classes in the sample to always go up from left to right. This is
|
107 |
+
makes sense because a higher class could never be left of a lower class in the
|
108 |
+
representation chosen here. Two methods are available, Isotonic Regression and
|
109 |
+
a group first method. I would use the Isotonic regression.
|
110 |
+
|
111 |
+
Args:
|
112 |
+
predictions (torch.Tensor): Tensor output of the model in shape (batch_size, channel_size, sample_size)
|
113 |
+
method (str, optional): method to use for enforcing the sequentiality. Defaults to 'isotonic'.
|
114 |
+
|
115 |
+
Raises:
|
116 |
+
NotImplementedError: if the given method is not implemented
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
torch.Tensor: Tensor in the same shape as the input but then with only increasing classes from left to right.
|
120 |
+
"""
|
121 |
+
predictions = predictions.detach().cpu()
|
122 |
+
n_classes = predictions.shape[1] # 1 is the channel dimension
|
123 |
+
if method == 'first':
|
124 |
+
# loop over batch
|
125 |
+
for j in range(predictions.shape[0]):
|
126 |
+
pred = torch.argmax(predictions[j], dim=0)
|
127 |
+
|
128 |
+
# construct dict of groups of start-end indices for class
|
129 |
+
groups = defaultdict(list)
|
130 |
+
current_class = pred[0]
|
131 |
+
group_start_idx = 0
|
132 |
+
for i in range(1, len(pred)):
|
133 |
+
if pred[i] != current_class:
|
134 |
+
groups[current_class.item()].append((group_start_idx, i))
|
135 |
+
group_start_idx = i
|
136 |
+
current_class = pred[i]
|
137 |
+
|
138 |
+
# if the class occurs again later in the profile
|
139 |
+
# discard this occurance of it
|
140 |
+
new_pred = torch.zeros(len(pred))
|
141 |
+
last_index = 0
|
142 |
+
for class_n, group_tuples in sorted(groups.items()):
|
143 |
+
for group_tuple in group_tuples:
|
144 |
+
if group_tuple[0] >= last_index:
|
145 |
+
new_pred[group_tuple[0]:group_tuple[1]] = class_n
|
146 |
+
last_index = group_tuple[1]
|
147 |
+
break
|
148 |
+
|
149 |
+
# simple forward fill
|
150 |
+
for i in range(1, len(new_pred)):
|
151 |
+
if new_pred[i] == 0:
|
152 |
+
new_pred[i] = new_pred[i-1]
|
153 |
+
|
154 |
+
# encode back to one-hot tensor
|
155 |
+
predictions[j] = F.one_hot(new_pred.to(torch.int64), num_classes=n_classes).permute(1,0)
|
156 |
+
elif method == 'isotonic':
|
157 |
+
for i in range(predictions.shape[0]):
|
158 |
+
pred = torch.argmax(predictions[i], dim=0)
|
159 |
+
|
160 |
+
x = np.arange(0,len(pred))
|
161 |
+
iso_reg = IsotonicRegression().fit(x, pred)
|
162 |
+
new_pred = iso_reg.predict(x)
|
163 |
+
new_pred = np.round(new_pred)
|
164 |
+
|
165 |
+
# encode back to one-hot tensor
|
166 |
+
new_pred = F.one_hot(torch.Tensor(new_pred).to(torch.int64), num_classes=n_classes).permute(1,0)
|
167 |
+
predictions[i] = new_pred
|
168 |
+
else:
|
169 |
+
raise NotImplementedError(f"Unknown method: {method}")
|
170 |
+
|
171 |
+
return predictions
|
172 |
+
|
173 |
+
|
174 |
+
|
175 |
+
def visualize_prediction(heights, prediction, labels, location_name, class_list):
|
176 |
+
"""visualize a profile plus labels and prediction
|
177 |
+
|
178 |
+
Args:
|
179 |
+
heights (tensor): tensor containing the heights data of the profile
|
180 |
+
prediction (tensor): tensor containing the predicted data of the profile
|
181 |
+
labels (tensor): tensor containing the labels for each height point in heights
|
182 |
+
location_name (str): name of the profile, just for visualization
|
183 |
+
class_list (str): class mapping to use, determines which labels are visualized
|
184 |
+
"""
|
185 |
+
class_dict, inverse_class_dict, _ = get_class_dict(class_list)
|
186 |
+
fig, ax = plt.subplots(figsize=(20,11))
|
187 |
+
plt.title(location_name)
|
188 |
+
plt.plot(heights, label='profile')
|
189 |
+
|
190 |
+
# change one-hot batched format to list of classes
|
191 |
+
if prediction.dim() == 3:
|
192 |
+
prediction = torch.argmax(torch.squeeze(prediction, dim=0), dim=0)
|
193 |
+
if prediction.dim() == 2:
|
194 |
+
# assuming channel first representation
|
195 |
+
prediction = torch.argmax(prediction, dim=0)
|
196 |
+
prediction = prediction.detach().cpu().numpy()
|
197 |
+
|
198 |
+
# ax.set_ylim(top=np.max(heights), bottom=np.min(heights))
|
199 |
+
label_height = np.min(heights)
|
200 |
+
n_labels = len(np.unique(labels))
|
201 |
+
label_height_distance = (np.max(heights) - np.min(heights))/(n_labels*2)
|
202 |
+
|
203 |
+
cmap = sns.color_palette("Set2", len(set(class_dict.values())))
|
204 |
+
|
205 |
+
# plot actual labels
|
206 |
+
prev_class_n = 999
|
207 |
+
for index, class_n in enumerate(labels):
|
208 |
+
if class_n == 0:
|
209 |
+
continue
|
210 |
+
if class_n != prev_class_n:
|
211 |
+
plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
|
212 |
+
plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
|
213 |
+
label_height += label_height_distance
|
214 |
+
prev_class_n = class_n
|
215 |
+
|
216 |
+
# plot predicted points
|
217 |
+
used_classes = []
|
218 |
+
prev_class_n = 999
|
219 |
+
for index, class_n in enumerate(prediction):
|
220 |
+
if class_n == 0 or class_n in used_classes:
|
221 |
+
continue
|
222 |
+
if class_n != prev_class_n:
|
223 |
+
plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(1,1))) # small dots
|
224 |
+
plt.text(index, label_height, "predicted " + inverse_class_dict[class_n], rotation=0)
|
225 |
+
label_height += label_height_distance
|
226 |
+
used_classes.append(prev_class_n)
|
227 |
+
prev_class_n = class_n
|
228 |
+
|
229 |
+
plt.show()
|
230 |
+
|
231 |
+
|
232 |
+
def visualize_sample(heights, labels, location_name, class_list):
|
233 |
+
"""visualize a profile and labels.
|
234 |
+
|
235 |
+
Args:
|
236 |
+
heights (tensor): tensor containing the heights data of the profile
|
237 |
+
labels (tensor): tensor containing the labels for each height point in heights
|
238 |
+
location_name (str): name of the profile, just for visualization
|
239 |
+
class_list (str): class mapping to use, determines which labels are visualized
|
240 |
+
"""
|
241 |
+
class_dict, inverse_class_dict, _ = get_class_dict(class_list)
|
242 |
+
fig, ax = plt.subplots(figsize=(20,11))
|
243 |
+
plt.title(location_name)
|
244 |
+
plt.plot(heights, label='profile')
|
245 |
+
|
246 |
+
# ax.set_ylim(top=np.max(heights), bottom=np.min(heights))
|
247 |
+
label_height = np.min(heights)
|
248 |
+
n_labels = len(np.unique(labels))
|
249 |
+
label_height_distance = (np.max(heights) - np.min(heights))/(n_labels*2)
|
250 |
+
|
251 |
+
cmap = sns.color_palette("Set2", len(set(class_dict.values())))
|
252 |
+
|
253 |
+
# plot actual labels
|
254 |
+
prev_class_n = 999
|
255 |
+
for index, class_n in enumerate(labels):
|
256 |
+
if class_n == 0:
|
257 |
+
continue
|
258 |
+
if class_n != prev_class_n:
|
259 |
+
plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
|
260 |
+
plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
|
261 |
+
label_height += label_height_distance
|
262 |
+
prev_class_n = class_n
|
263 |
+
|
264 |
+
plt.show()
|
265 |
+
|
266 |
+
def visualize_files(linesfp, pointsfp, max_profile_size=512, class_list='simple', location_index=0, return_dict=False):
|
267 |
+
"""visualize profile lines and points filepaths.
|
268 |
+
|
269 |
+
Args:
|
270 |
+
linesfp (str): path to surfacelines file.
|
271 |
+
pointsfp (str): path to points file.
|
272 |
+
max_profile_size (int, optional): cutoff size of the profile, can leave on default here. Defaults to 512.
|
273 |
+
class_list (str, optional): class mapping to use. Defaults to 'simple'.
|
274 |
+
location_index (int, optional): index of profile to visualize.. Defaults to 0.
|
275 |
+
return_dict (bool, optional): return the profile dict for faster visualization. Defaults to False.
|
276 |
+
|
277 |
+
Returns:
|
278 |
+
[dict, optional]: profile dict containing the profiles of the given files
|
279 |
+
"""
|
280 |
+
profile_label_dict = preprocessing.filepath_pair_to_labeled_sample(linesfp,
|
281 |
+
pointsfp,
|
282 |
+
max_profile_size=max_profile_size,
|
283 |
+
class_list=class_list)
|
284 |
+
|
285 |
+
location_name = list(profile_label_dict.keys())[location_index]
|
286 |
+
heights = profile_label_dict[location_name]['profile']
|
287 |
+
labels = profile_label_dict[location_name]['label']
|
288 |
+
|
289 |
+
class_dict, inverse_class_dict, _ = get_class_dict(class_list)
|
290 |
+
fig, ax = plt.subplots(figsize=(20,11))
|
291 |
+
plt.title(location_name)
|
292 |
+
plt.plot(heights, label='profile')
|
293 |
+
|
294 |
+
label_height = np.min(heights)
|
295 |
+
n_labels = len(np.unique(labels))
|
296 |
+
label_height_distance = (np.max(heights) - np.min(heights))/(n_labels)
|
297 |
+
|
298 |
+
cmap = sns.color_palette("Set2", len(set(class_dict.values())))
|
299 |
+
|
300 |
+
# plot actual labels
|
301 |
+
prev_class_n = 999
|
302 |
+
for index, class_n in enumerate(labels):
|
303 |
+
if class_n == 0:
|
304 |
+
continue
|
305 |
+
if class_n != prev_class_n:
|
306 |
+
plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
|
307 |
+
plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
|
308 |
+
label_height += label_height_distance
|
309 |
+
prev_class_n = class_n
|
310 |
+
|
311 |
+
plt.show()
|
312 |
+
|
313 |
+
if return_dict:
|
314 |
+
return profile_label_dict
|
315 |
+
|
316 |
+
def visualize_dict(profile_label_dict, class_list='simple', location_index=0):
|
317 |
+
"""visualise profile with labels from profile_dict, profile specified by index.
|
318 |
+
|
319 |
+
Args:
|
320 |
+
profile_label_dict (dict): dict containing profiles and labels
|
321 |
+
class_list (str, optional): class_mapping to use for visualization. Defaults to 'simple'.
|
322 |
+
location_index (int, optional): specifies the index of the profile to visualize. Defaults to 0.
|
323 |
+
"""
|
324 |
+
location_name = list(profile_label_dict.keys())[location_index]
|
325 |
+
heights = profile_label_dict[location_name]['profile']
|
326 |
+
labels = profile_label_dict[location_name]['label']
|
327 |
+
|
328 |
+
class_dict, inverse_class_dict, _ = get_class_dict(class_list)
|
329 |
+
fig, ax = plt.subplots(figsize=(20,11))
|
330 |
+
plt.title(location_name)
|
331 |
+
plt.plot(heights, label='profile')
|
332 |
+
|
333 |
+
label_height = np.min(heights)
|
334 |
+
n_labels = len(np.unique(labels))
|
335 |
+
label_height_distance = (np.max(heights) - np.min(heights))/(n_labels)
|
336 |
+
|
337 |
+
cmap = sns.color_palette("Set2", len(set(class_dict.values())))
|
338 |
+
|
339 |
+
# plot actual labels
|
340 |
+
prev_class_n = 999
|
341 |
+
for index, class_n in enumerate(labels):
|
342 |
+
if class_n == 0:
|
343 |
+
continue
|
344 |
+
if class_n != prev_class_n:
|
345 |
+
plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
|
346 |
+
plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
|
347 |
+
label_height += label_height_distance
|
348 |
+
prev_class_n = class_n
|
349 |
+
|
350 |
+
plt.show()
|
dijkprofile-annotator/dijkprofile_annotator/web/.gitkeep
ADDED
File without changes
|
dijkprofile-annotator/dijkprofile_annotator/web/app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import math
|
3 |
+
import gradio as gr
|
4 |
+
import dijkprofile_annotator
|
5 |
+
from zipfile import ZipFile
|
6 |
+
|
7 |
+
def annotate_file(file_objects, model_type):
|
8 |
+
# TODO: actually use different model types based on selected model, only a well trained dijk model is available now.
|
9 |
+
generated_charfiles = []
|
10 |
+
str1 = "Starting processing of files."
|
11 |
+
pad1 = math.floor((os.get_terminal_size().columns - len(str1)) / 2) * "="
|
12 |
+
print(pad1 + "Starting processing of files." + pad1)
|
13 |
+
for i, file_obj in enumerate(file_objects):
|
14 |
+
target_filepath = f"/tmp/characteristicpoints_{i}.csv"
|
15 |
+
print(f" Processing file '{file_obj.name}', model '{model_type}', saving to '{target_filepath}'")
|
16 |
+
dijkprofile_annotator.annotate(file_obj.name, target_filepath, device='cpu')
|
17 |
+
generated_charfiles.append(target_filepath)
|
18 |
+
print(f" finished processing: {file_obj.name}! saved to : {target_filepath}")
|
19 |
+
print(" ", "-" * (os.get_terminal_size().columns - 5))
|
20 |
+
|
21 |
+
print("finished with all processing!")
|
22 |
+
# return the csv file if only 1 file was given, return a zip otherwise.
|
23 |
+
if len(generated_charfiles) == 1:
|
24 |
+
print(f"returning file: {generated_charfiles[0]}")
|
25 |
+
return generated_charfiles[0]
|
26 |
+
else:
|
27 |
+
return_zipfile = "/tmp/characterist_points.zip"
|
28 |
+
with ZipFile(return_zipfile, 'w') as zipObj:
|
29 |
+
for filepath in generated_charfiles:
|
30 |
+
zipObj.write(filepath)
|
31 |
+
print(f"returning file: {return_zipfile}")
|
32 |
+
return return_zipfile
|
33 |
+
|
34 |
+
description = "Upload een surfacelines.csv bestand in QDAMEdit format en krijg een annotatie file in characteristicpoints format terug \n" +\
|
35 |
+
"Een neural netwerk gebaseerd op image segmentation heeft geleerd op basis van ~6000 geannoteerde profielen om zo goed mogelijk automatisch de punten te plaatsen op de profielen.\n" +\
|
36 |
+
"Er zijn meerdere modellen beschikbaar om de annotatie te genereren, het 'dijk' model probeert alleen de dijk te vinden, het 'dijk+sloot' model zoekt ook naar een sloot en het 'volledig' model " +\
|
37 |
+
"probeert zo veel mogelijk van de punten beschikbaar in het QDAMEdit format te vinden. Probeer eerst het 'dijk' model aangezien hier de consistentste resultaten uit komen."
|
38 |
+
|
39 |
+
iface = gr.Interface(
|
40 |
+
fn=annotate_file,
|
41 |
+
title="Dijkprofiel Annotator",
|
42 |
+
description=description,
|
43 |
+
inputs=[gr.inputs.File(file_count="multiple", type="file", label="te annoteren surfacelines files", optional=False), gr.inputs.Dropdown(['dijk', 'dijk+sloot', "volledig"], type="value", default=None, label='Model type')],
|
44 |
+
outputs=gr.outputs.File(label="gegenereerde file"))
|
45 |
+
iface.launch()
|
dijkprofile-annotator/output/models/.gitkeep
ADDED
File without changes
|
dijkprofile-annotator/output/models/scaler.pik
ADDED
Binary file (611 Bytes). View file
|
|
dijkprofile-annotator/output/reports/.gitkeep
ADDED
File without changes
|
dijkprofile-annotator/output/visualizations/.gitkeep
ADDED
File without changes
|
dijkprofile-annotator/run/configs/.gitkeep
ADDED
File without changes
|
dijkprofile-annotator/setup.cfg
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[pycodestyle]
|
2 |
+
max-line-length = 120
|
3 |
+
exclude = .ipynb_checkpoints
|
4 |
+
|
5 |
+
[pep8]
|
6 |
+
max-line-length = 120
|
7 |
+
exclude = .ipynb_checkpoints
|
8 |
+
|
9 |
+
[flake8]
|
10 |
+
max-line-length = 120
|
11 |
+
exclude = .ipynb_checkpoints
|
12 |
+
max-complexity = 10
|
dijkprofile-annotator/setup.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(name='dijkprofile_annotator',
|
4 |
+
version='0.1.0',
|
5 |
+
description='Automatically annotate drijkprofile in qDAMEdit format',
|
6 |
+
long_description=open('README.md').read(),
|
7 |
+
url='',
|
8 |
+
author='Jonathan Gerbscheid',
|
9 |
+
author_email='j.gerbscheid@hetwaterschapshuis.nl',
|
10 |
+
license='MIT',
|
11 |
+
package_dir={"dijkprofile_annotator": "dijkprofile_annotator"},
|
12 |
+
packages=find_packages(),
|
13 |
+
zip_safe=False,
|
14 |
+
install_requires=["joblib",
|
15 |
+
"matplotlib",
|
16 |
+
"numpy",
|
17 |
+
"pillow",
|
18 |
+
"scikit_learn>=1.0.1",
|
19 |
+
"seaborn",
|
20 |
+
"torch>=1.9.0"]
|
21 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
joblib==1.1.0
|
2 |
+
matplotlib==3.5.0
|
3 |
+
numpy==1.21.4
|
4 |
+
Pillow==8.4.0
|
5 |
+
scikit_learn==1.0.1
|
6 |
+
seaborn==0.11.2
|
7 |
+
torch==1.10.0
|
8 |
+
./dijkprofile-annotator
|