Spaces:

jgerbscheid
/

dpa-example

Runtime error

App Files Files Community

jgerbscheid commited on Jan 4, 2022

Commit

b9bac12

1 Parent(s): bcb2589

initial commit

Browse files

Files changed (44) hide show

README.md +4 -32
app.py +45 -0
dijkprofile-annotator/LICENSE +21 -0
dijkprofile-annotator/conftest.py +10 -0
dijkprofile-annotator/dijkprofile_annotator/__init__.py +13 -0
dijkprofile-annotator/dijkprofile_annotator/__pycache__/__init__.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/__pycache__/annotator.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/__pycache__/config.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/annotator.py +98 -0
dijkprofile-annotator/dijkprofile_annotator/config.py +169 -0
dijkprofile-annotator/dijkprofile_annotator/data/trained_models/dijknet_simple_95.pt +3 -0
dijkprofile-annotator/dijkprofile_annotator/data/trained_models/scaler.pik +0 -0
dijkprofile-annotator/dijkprofile_annotator/dataset/__init__.py +1 -0
dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/__init__.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/dataset.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/dataset/dataset.py +42 -0
dijkprofile-annotator/dijkprofile_annotator/models/__init__.py +1 -0
dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/__init__.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/dijknet.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/models/dijknet.py +109 -0
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__init__.py +8 -0
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/__init__.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/link_collector.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/preprocessing.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/preprocessing/preprocessing.py +355 -0
dijkprofile-annotator/dijkprofile_annotator/requirements.txt +7 -0
dijkprofile-annotator/dijkprofile_annotator/training/__init__.py +4 -0
dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/__init__.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/train.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/training/train.py +219 -0
dijkprofile-annotator/dijkprofile_annotator/utils/__init__.py +9 -0
dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/__init__.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/utils.cpython-39.pyc +0 -0
dijkprofile-annotator/dijkprofile_annotator/utils/utils.py +350 -0
dijkprofile-annotator/dijkprofile_annotator/web/.gitkeep +0 -0
dijkprofile-annotator/dijkprofile_annotator/web/app.py +45 -0
dijkprofile-annotator/output/models/.gitkeep +0 -0
dijkprofile-annotator/output/models/scaler.pik +0 -0
dijkprofile-annotator/output/reports/.gitkeep +0 -0
dijkprofile-annotator/output/visualizations/.gitkeep +0 -0
dijkprofile-annotator/run/configs/.gitkeep +0 -0
dijkprofile-annotator/setup.cfg +12 -0
dijkprofile-annotator/setup.py +21 -0
requirements.txt +8 -0

README.md CHANGED Viewed

@@ -1,37 +1,9 @@
 ---
-title: Dpa Example
-emoji: 🐨
-colorFrom: pink
-colorTo: yellow
 sdk: gradio
 app_file: app.py
 pinned: false
 ---
-# Configuration
-`title`: _string_
-Display title for the Space
-`emoji`: _string_
-Space emoji (emoji-only character allowed)
-`colorFrom`: _string_
-Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
-`colorTo`: _string_
-Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
-`sdk`: _string_
-Can be either `gradio` or `streamlit`
-`sdk_version` : _string_
-Only applicable for `streamlit` SDK.
-See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
-`app_file`: _string_
-Path to your main application file (which contains either `gradio` or `streamlit` Python code).
-Path is relative to the root of the repository.
-`pinned`: _boolean_
-Whether the Space stays on top of your list.

 ---
+title: dijkprofile-annotator
+emoji: 💦
+colorFrom: blue
+colorTo: indigo
 sdk: gradio
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import math
+import gradio as gr
+import dijkprofile_annotator
+from zipfile import ZipFile
+def annotate_file(file_objects, model_type):
+    # TODO: actually use different model types based on selected model, only a well trained dijk model is available now.
+    generated_charfiles = []
+    str1 = "Starting processing of files."
+    pad1 = math.floor((os.get_terminal_size().columns - len(str1)) / 2) * "="
+    print(pad1 + "Starting processing of files." + pad1)
+    for i, file_obj in enumerate(file_objects):
+        target_filepath = f"/tmp/characteristicpoints_{i}.csv"
+        print(f"    Processing file '{file_obj.name}', model '{model_type}', saving to '{target_filepath}'")
+        dijkprofile_annotator.annotate(file_obj.name, target_filepath, device='cpu')
+        generated_charfiles.append(target_filepath)
+        print(f"    finished processing: {file_obj.name}! saved to : {target_filepath}")
+        print("    ", "-" * (os.get_terminal_size().columns - 5))
+    print("finished with all processing!")
+    # return the csv file if only 1 file was given, return a zip otherwise.
+    if len(generated_charfiles) == 1:
+        print(f"returning file: {generated_charfiles[0]}")
+        return generated_charfiles[0]
+    else:
+        return_zipfile = "/tmp/characterist_points.zip"
+        with ZipFile(return_zipfile, 'w') as zipObj:
+            for filepath in generated_charfiles:
+                zipObj.write(filepath)
+        print(f"returning file: {return_zipfile}")
+        return return_zipfile
+description = "Upload een surfacelines.csv bestand in QDAMEdit format en krijg een annotatie file in characteristicpoints format terug \n" +\
+              "Een neural netwerk gebaseerd op image segmentation heeft geleerd op basis van ~6000 geannoteerde profielen om zo goed mogelijk automatisch de punten te plaatsen op de profielen.\n" +\
+              "Er zijn meerdere modellen beschikbaar om de annotatie te genereren, het 'dijk' model probeert alleen de dijk te vinden, het 'dijk+sloot' model zoekt ook naar een sloot en het 'volledig' model " +\
+              "probeert zo veel mogelijk van de punten beschikbaar in het QDAMEdit format te vinden. Probeer eerst het 'dijk' model aangezien hier de consistentste resultaten uit komen."
+iface = gr.Interface(
+  fn=annotate_file,
+  title="Dijkprofiel Annotator",
+  description=description,
+  inputs=[gr.inputs.File(file_count="multiple", type="file", label="te annoteren surfacelines files", optional=False), gr.inputs.Dropdown(['dijk', 'dijk+sloot', "volledig"], type="value", default=None, label='Model type')],
+  outputs=gr.outputs.File(label="gegenereerde file"))
+iface.launch()

dijkprofile-annotator/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Het Waterschapshuis / Kenniscentrum / Tooling
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

dijkprofile-annotator/conftest.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pathlib import Path
+import pytest
+# This is used to get code coverage working correctly when running unit tests
+def pytest_collection_modifyitems(items):
+    no_cov = pytest.mark.no_cover
+    for item in items:
+        if "integration" in Path(item.fspath).parts:
+            item.add_marker(no_cov)

dijkprofile-annotator/dijkprofile_annotator/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from . import models
+from . import dataset
+from . import training
+from . import utils
+from . import config
+from . import preprocessing
+from .utils import visualize_sample
+from .utils import visualize_prediction
+from .utils import visualize_files
+from .utils import visualize_dict
+from .annotator import annotate
+from .annotator import make_predictions
+from .annotator import write_predictions_

dijkprofile-annotator/dijkprofile_annotator/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (637 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/__pycache__/annotator.cpython-39.pyc ADDED Viewed

Binary file (2.74 kB). View file

dijkprofile-annotator/dijkprofile_annotator/__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (4.18 kB). View file

dijkprofile-annotator/dijkprofile_annotator/annotator.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import csv
+import os
+import numpy as np
+import torch
+import dijkprofile_annotator.config as config
+import dijkprofile_annotator.utils as utils
+import dijkprofile_annotator.preprocessing as preprocessing
+from dijkprofile_annotator.models import Dijknet
+def annotate(surfacelines_filepath, outputfile, class_list='simple', max_profile_length=512, custom_model_path=None, custom_scaler_path=None, device=None):
+    surfacelines_dict = preprocessing.read_surfaceline_file(surfacelines_filepath)
+    profile_dict = preprocessing.make_height_profiles(surfacelines_dict, max_profile_length)
+    dir = os.path.dirname(__file__)
+    if device:
+        device = device
+    else:
+        # setup model
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    class_dict, _, _ = utils.get_class_dict(class_list)
+    model = Dijknet(1, len(class_dict))
+    if custom_model_path:
+        model.load_state_dict(torch.load(custom_model_path, map_location=device))
+    else:
+        model.load_state_dict(torch.load(os.path.join(dir, config.MODEL_PATH), map_location=device))
+    model.eval()
+    # copy network to device
+    model = model.to(device)
+    predictions = make_predictions(model, profile_dict, max_profile_length, device)
+    write_predictions_(predictions, profile_dict, surfacelines_dict, outputfile, class_list)
+def make_predictions(model, profile_dict, max_profile_length, device):
+    accumulator = np.zeros((len(profile_dict), max_profile_length))
+    for i, key in enumerate(profile_dict.keys()):
+        accumulator[i] = profile_dict[key]['profile'][:max_profile_length]
+    accumulator = accumulator.reshape(accumulator.shape[0], 1, max_profile_length)
+    outputs = model(torch.tensor(accumulator).to(device).float())
+    flat_output = torch.argmax(outputs, dim=1).cpu()
+    predictions = flat_output.numpy()
+    return predictions
+def write_predictions_(predictions, profile_dict, surfacelines_dict, output_filepath, class_list):
+    class_dict, inverse_class_dict, class_weights = utils.get_class_dict(class_list)
+    with open(output_filepath, 'w') as csvFile:
+        writer = csv.writer(csvFile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
+        writer.writerow(config.HEADER)
+        for i, key in enumerate(profile_dict.keys()):
+            # get predictions
+            profile_pred = predictions[i]
+            # construct dict with key for each row
+            row_dict = {key:-1 for key in config.HEADER}
+            row_dict["LOCATIONID"] = key
+            # loop through predictions and for the entries
+            used_classes = []
+            prev_class_n = 999 # key thats not in the inverse_class_dict
+            for index, class_n in enumerate(profile_pred):
+                if class_n == 0 or class_n in used_classes:
+                    continue
+                if class_n != prev_class_n:
+                    # get class name
+                    class_name = inverse_class_dict[class_n]
+                    # if this index is different from the last, this is the characteristicpoint
+                    used_classes.append(prev_class_n)
+                    # set prev_class to the new class
+                    prev_class_n = class_n
+                    # construct the csv row with the new class
+                    if index >= len(surfacelines_dict[key]):
+                        continue
+                    (x,y,z) = surfacelines_dict[key][index]
+                    row_dict["X_" + class_name] = round(x, 3)
+                    row_dict["Y_" + class_name] = round(y, 3)
+                    row_dict["Z_" + class_name] = round(z, 3)
+            # write the row to the csv file
+            row = []
+            for columnname in config.HEADER:
+                row.append(row_dict[columnname])
+            writer.writerow(row)

dijkprofile-annotator/dijkprofile_annotator/config.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import os
+CHARPOINT_CONVERSION_DICT = {
+    "": "leeg",
+    "101_Q19_2": "buitenkruin",
+    "101_Q19_3": "binnenkruin",
+    "101_Q19_5": "binnenteen",
+    "105_T09_11": "insteek_sloot",
+    "811_T13_8": "leeg",
+    "351_T03_10": "leeg",
+    "_T01_KKW": "leeg",
+    "108_Q06_250": "leeg",
+    "303_Q05_1": "leeg",
+    "353__11": "leeg",
+    "_T00_17": "leeg",
+    "109_Q08_13": "leeg",
+    "_Q07_KDM": "leeg",
+    "_Q07_KDW": "leeg",
+    '0': "leeg",
+    None: "leeg",
+    'nan': "leeg"
+}
+CLASS_DICT_REGIONAL = {
+    "leeg": 0,
+    "startpunt": 1,
+    "buitenkruin": 2,
+    "binnenkruin": 3,
+    "binnenteen": 4,
+    "insteek_sloot": 5
+}
+WEIGHT_DICT_REGIONAL = [0.1, 1.0, 1.1, 1.0, 0.1]
+CLASS_DICT_FULL = {
+        'leeg': 0,
+        'Maaiveld binnenwaarts': 1,
+        'Insteek sloot polderzijde': 2,
+        'Slootbodem polderzijde': 3,
+        'Slootbodem dijkzijde': 4,
+        'Insteek sloot dijkzijde': 5,
+        'Teen dijk binnenwaarts': 6,
+        'Kruin binnenberm': 7,
+        'Insteek binnenberm': 8,
+        'Kruin binnentalud': 9,
+        'Verkeersbelasting kant binnenwaarts': 9,  # 10
+        'Verkeersbelasting kant buitenwaarts': 10,
+        'Kruin buitentalud': 10,  # 12
+        'Insteek buitenberm': 11,
+        'Kruin buitenberm': 12,
+        'Teen dijk buitenwaarts': 13,
+        'Insteek geul': 14,
+        'Teen geul': 15,
+        'Maaiveld buitenwaarts': 16,
+    }
+# TODO: write this out explicitely
+WEIGHT_DICT_FULL = [1.0] * 17
+CLASS_DICT_SIMPLE = {
+    'leeg': 0,
+    'Maaiveld buitenwaarts': 1,
+    'Teen dijk buitenwaarts': 2,
+    'Kruin buitentalud': 3,
+    'Kruin binnentalud': 4,
+    'Teen dijk binnenwaarts': 5,
+}
+WEIGHT_DICT_SIMPLE = [0.1, 0.5, 0.7, 1.0, 1.0, 0.5]
+CLASS_DICT_SIMPLE_SLOOT = {
+    'leeg': 0,
+    'Maaiveld buitenwaarts': 1,
+    'Teen dijk buitenwaarts': 2,
+    'Kruin buitentalud': 3,
+    'Kruin binnentalud': 4,
+    'Teen dijk binnenwaarts': 5,
+    'Insteek sloot dijkzijde': 6,
+    'Insteek sloot polderzijde': 7,
+    'Slootbodem polderzijde': 8,
+    'Slootbodem dijkzijde': 9,
+}
+WEIGHT_DICT_SIMPLE_SLOOT = [0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]
+CLASS_DICT_SIMPLE_BERM = {
+    'leeg': 0,
+    'Maaiveld buitenwaarts': 1,
+    'Teen dijk buitenwaarts': 2,
+    'Kruin buitentalud': 3,
+    'Kruin binnentalud': 4,
+    'Teen dijk binnenwaarts': 5,
+    'Insteek sloot dijkzijde': 6,
+    'Insteek sloot polderzijde': 7,
+    'Slootbodem polderzijde': 8,
+    'Slootbodem dijkzijde': 9,
+    'Kruin binnenberm': 10,
+    'Insteek binnenberm': 11,
+}
+WEIGHT_DICT_SIMPLE_BERM = [0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]
+HEADER = ["LOCATIONID",
+          "X_Maaiveld binnenwaarts",
+          "Y_Maaiveld binnenwaarts",
+          "Z_Maaiveld binnenwaarts",
+          "X_Insteek sloot polderzijde",
+          "Y_Insteek sloot polderzijde",
+          "Z_Insteek sloot polderzijde",
+          "X_Slootbodem polderzijde",
+          "Y_Slootbodem polderzijde",
+          "Z_Slootbodem polderzijde",
+          "X_Slootbodem dijkzijde",
+          "Y_Slootbodem dijkzijde",
+          "Z_Slootbodem dijkzijde",
+          "X_Insteek sloot dijkzijde",
+          "Y_Insteek sloot dijkzijde",
+          "Z_Insteek sloot dijkzijde",
+          "X_Teen dijk binnenwaarts",
+          "Y_Teen dijk binnenwaarts",
+          "Z_Teen dijk binnenwaarts",
+          "X_Kruin binnenberm",
+          "Y_Kruin binnenberm",
+          "Z_Kruin binnenberm",
+          "X_Insteek binnenberm",
+          "Y_Insteek binnenberm",
+          "Z_Insteek binnenberm",
+          "X_Kruin binnentalud",
+          "Y_Kruin binnentalud",
+          "Z_Kruin binnentalud",
+          "X_Verkeersbelasting kant binnenwaarts",
+          "Y_Verkeersbelasting kant binnenwaarts",
+          "Z_Verkeersbelasting kant binnenwaarts",
+          "X_Verkeersbelasting kant buitenwaarts",
+          "Y_Verkeersbelasting kant buitenwaarts",
+          "Z_Verkeersbelasting kant buitenwaarts",
+          "X_Kruin buitentalud",
+          "Y_Kruin buitentalud",
+          "Z_Kruin buitentalud",
+          "X_Insteek buitenberm",
+          "Y_Insteek buitenberm",
+          "Z_Insteek buitenberm",
+          "X_Kruin buitenberm",
+          "Y_Kruin buitenberm",
+          "Z_Kruin buitenberm",
+          "X_Teen dijk buitenwaarts",
+          "Y_Teen dijk buitenwaarts",
+          "Z_Teen dijk buitenwaarts",
+          "X_Insteek geul",
+          "Y_Insteek geul",
+          "Z_Insteek geul",
+          "X_Teen geul",
+          "Y_Teen geul",
+          "Z_Teen geul",
+          "X_Maaiveld buitenwaarts",
+          "Y_Maaiveld buitenwaarts",
+          "Z_Maaiveld buitenwaarts"]
+SCALER_PATH = os.path.join("data", "trained_models", "scaler.pik")
+MODEL_PATH = os.path.join('data', 'trained_models', 'dijknet_simple_95.pt')
+INVERSE_CLASS_DICT_FULL = {v: k for k, v in CLASS_DICT_FULL.items()}
+INVERSE_CLASS_DICT_SIMPLE = {v: k for k, v in CLASS_DICT_SIMPLE.items()}
+INVERSE_CLASS_DICT_SIMPLE_BERM = {v: k for k, v in CLASS_DICT_SIMPLE_BERM.items()}
+INVERSE_CLASS_DICT_SIMPLE_SLOOT = {v: k for k, v in CLASS_DICT_SIMPLE_SLOOT.items()}
+INVERSE_CLASS_DICT_REGIONAL = {v: k for k, v in CLASS_DICT_REGIONAL.items()}
+# manual mappings to get the correct names for plotting later
+if 11 in INVERSE_CLASS_DICT_FULL:
+    INVERSE_CLASS_DICT_FULL[10] = 'Kruin buitentalud'

dijkprofile-annotator/dijkprofile_annotator/data/trained_models/dijknet_simple_95.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97f131655b81f0fb8f03fb7162f42a1b9baae1bd0fda064c761a0af35b4858e0
+size 106467655

dijkprofile-annotator/dijkprofile_annotator/data/trained_models/scaler.pik ADDED Viewed

Binary file (420 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .dataset import DijkprofileDataset

dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (267 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/dataset/__pycache__/dataset.cpython-39.pyc ADDED Viewed

Binary file (1.81 kB). View file

dijkprofile-annotator/dijkprofile_annotator/dataset/dataset.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import numpy as np
+import torch.utils.data as data
+class DijkprofileDataset(data.Dataset):
+    """Pytorch custom dataset class to use with the pytorch dataloader."""
+    def __init__(self, profile_dict, partition, custom_scaler_path=None):
+        """Dijkprofile Dataset, provides profiles and labels to pytorch model.
+        Args:
+            profile_dict (dict): dict containing the profiles and labels
+            partition (list): list used to split the dataset into train and test
+            sets. list contains ids to use for this dataset, format is
+            as returned by sklearn.model_selection.train_test_split
+        """
+        self.data_dict = profile_dict
+        self.list_IDs = partition
+        print("scaler in dataset class is depracated and moved to preprocessing")
+        # load scaler
+        # if custom_scaler_path:
+        #     self.scaler = joblib.load(custom_scaler_path)
+        # else:
+        #     self.scaler = joblib.load(os.path.join(dir, config.SCALER_PATH))
+        # # rescale all profiles profiles
+        # for key in profile_dict.keys():
+            # profile_dict[key]['profile'] = self.scaler.transform(
+            #     profile_dict[key]['profile'].reshape(-1, 1)).reshape(-1)
+            # profile_dict[key]['profile'] = profile_dict[key]['profile'] / 10
+    def __len__(self):
+        return len(self.list_IDs)
+    def __getitem__(self, index):
+        id = self.list_IDs[index]
+        X = self.data_dict[id]['profile'].reshape(1,-1).astype(np.float32)
+        y = self.data_dict[id]['label'].reshape(1,-1)
+        return X, y
+    def __str__(self):
+        return "<Dijkprofile dataset: datapoints={}>".format(len(self.list_IDs))

dijkprofile-annotator/dijkprofile_annotator/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .dijknet import Dijknet

dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (255 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/models/__pycache__/dijknet.cpython-39.pyc ADDED Viewed

Binary file (3.86 kB). View file

dijkprofile-annotator/dijkprofile_annotator/models/dijknet.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import torch
+import torch.nn as nn
+from dijkprofile_annotator.utils import extract_img
+class Double_conv(nn.Module):
+    '''(conv => ReLU) * 2 => MaxPool2d'''
+    def __init__(self, in_ch, out_ch, p):
+        """
+        Args:
+            in_ch(int) : input channel
+            out_ch(int) : output channel
+        """
+        super(Double_conv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv1d(in_ch, out_ch, 3, padding=1, stride=1),
+            nn.ReLU(inplace=True),
+            nn.Conv1d(out_ch, out_ch, 5, padding=2, stride=1),
+            nn.ReLU(inplace=True),
+            nn.Conv1d(out_ch, out_ch, 7, padding=3, stride=1),
+            nn.ReLU(inplace=True),
+            nn.Dropout(p=p)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class Conv_down(nn.Module):
+    '''(conv => ReLU) * 2 => MaxPool2d'''
+    def __init__(self, in_ch, out_ch, p):
+        """
+        Args:
+            in_ch(int) : input channel
+            out_ch(int) : output channel
+        """
+        super(Conv_down, self).__init__()
+        self.conv = Double_conv(in_ch, out_ch, p)
+        self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
+    def forward(self, x):
+        x = self.conv(x)
+        pool_x = self.pool(x)
+        return pool_x, x
+class Conv_up(nn.Module):
+    '''(conv => ReLU) * 2 => MaxPool2d'''
+    def __init__(self, in_ch, out_ch, p):
+        """
+        Args:
+            in_ch(int) : input channel
+            out_ch(int) : output channel
+        """
+        super(Conv_up, self).__init__()
+        self.up = nn.ConvTranspose1d(in_ch, out_ch, kernel_size=2, stride=2)
+        self.conv = Double_conv(in_ch, out_ch, p)
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        x1_dim = x1.size()[2]
+        x2 = extract_img(x1_dim, x2)
+        x1 = torch.cat((x1, x2), dim=1)
+        x1 = self.conv(x1)
+        return x1
+class Dijknet(nn.Module):
+    """Dijknet convolutional neural network. 1D Unet variant."""
+    def __init__(self, in_channels, out_channels, p=0.25):
+        """Dijknet convlutional neural network, 1D Unet Variant. Model is probably a bit too big
+        for what it needs to do, but it seems to work just fine.
+        Args:
+            in_channels (int): number of input channels, should be 1
+            out_channels (int): number of output channels/classes
+            p (float, optional): dropout chance for the dropout layers. Defaults to 0.25.
+        """
+        super(Dijknet, self).__init__()
+        self.Conv_down1 = Conv_down(in_channels, 64, p)
+        self.Conv_down2 = Conv_down(64, 128, p)
+        self.Conv_down3 = Conv_down(128, 256, p)
+        self.Conv_down4 = Conv_down(256, 512, p)
+        self.Conv_down5 = Conv_down(512, 1024, p)
+        self.Conv_up1 = Conv_up(1024, 512, p)
+        self.Conv_up2 = Conv_up(512, 256, p)
+        self.Conv_up3 = Conv_up(256, 128, p)
+        self.Conv_up4 = Conv_up(128, 64, p)
+        self.Conv_up5 = Conv_up(128, 64, p)
+        self.Conv_out = nn.Conv1d(64, out_channels, 1, padding=0, stride=1)
+        self.Conv_final = nn.Conv1d(out_channels, out_channels, 1, padding=0, stride=1)
+    def forward(self, x):
+        x, conv1 = self.Conv_down1(x)
+        x, conv2 = self.Conv_down2(x)
+        x, conv3 = self.Conv_down3(x)
+        x, conv4 = self.Conv_down4(x)
+        _, x = self.Conv_down5(x)
+        x = self.Conv_up1(x, conv4)
+        x = self.Conv_up2(x, conv3)
+        x = self.Conv_up3(x, conv2)
+        x = self.Conv_up4(x, conv1)
+        # final upscale to true size
+        x = self.Conv_out(x)
+        x = self.Conv_final(x)
+        return x

dijkprofile-annotator/dijkprofile_annotator/preprocessing/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .preprocessing import filepath_pair_to_labeled_sample
+from .preprocessing import file_pairs_to_tensor_profiles
+from .preprocessing import read_charpoints_file
+from .preprocessing import read_surfaceline_file
+from .preprocessing import make_height_profiles
+from .preprocessing import make_labeled_height_profiles
+from .preprocessing import get_file_pairs_from_dir
+from .preprocessing import load_datasets

dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (571 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/link_collector.cpython-39.pyc ADDED Viewed

Binary file (1.02 kB). View file

dijkprofile-annotator/dijkprofile_annotator/preprocessing/__pycache__/preprocessing.cpython-39.pyc ADDED Viewed

Binary file (11.2 kB). View file

dijkprofile-annotator/dijkprofile_annotator/preprocessing/preprocessing.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import csv
+import os
+from operator import itemgetter
+import numpy as np
+from dijkprofile_annotator.config import (CLASS_DICT_FULL, CLASS_DICT_REGIONAL,
+                                          CLASS_DICT_SIMPLE,
+                                          CLASS_DICT_SIMPLE_BERM,
+                                          CLASS_DICT_SIMPLE_SLOOT)
+from dijkprofile_annotator.dataset import DijkprofileDataset
+from sklearn.model_selection import train_test_split
+def read_surfaceline_file(surfaceline_fp):
+    """Read surfaceline file and convert to dict.
+    Args:
+        surfaceline_fp (string): path to the surfacelines file.
+    Returns:
+        dict: dict containing list of points per location.
+    """
+    # read the coordinates and collect to surfaceline_dict
+    surfacelines = {}
+    with open(surfaceline_fp) as csvfile:
+        surfacereader = csv.reader(csvfile, delimiter=';', quotechar='|')
+        next(surfacereader)  # skip header
+        # print("header: {}".format(header)) # not very useful
+        stop_exec = False
+        for row in surfacereader:
+            if stop_exec:
+                break
+            location = row[0]
+            surfacelines[location] = []
+            for i in range(1, len(row)-2, 3):
+                # some files have empty points
+                if row[i] == '' or row[i+1] == '' or row[i+2] == '':
+                    continue
+                try:
+                    x = _parse_coordinate(row[i].replace('"', ''))
+                    y = _parse_coordinate(row[i+1].replace('"', ''))
+                    z = _parse_coordinate(row[i+2].replace('"', ''))
+                    surfacelines[location].append((x, y, z))
+                except ValueError as e:
+                    print(f"error reading point from surfaceline at location: {location} (index: {i}), error: {e}")
+                    stop_exec = True
+                    break
+    return surfacelines
+def read_charpoints_file(charlines_fp):
+    """Read characteristicpoints file and convert to dict.
+    Args:
+        charlines_fp (string): path to characteristicpoints file.
+    Returns:
+        dict: dict containing list of points per location.
+    """
+    charpoints = {}
+    with open(charlines_fp) as csvfile:
+        cpointsreader = csv.reader(csvfile, delimiter=';', quotechar='|')
+        header = next(cpointsreader)
+        stop_exec = False
+        for idx, row in enumerate(cpointsreader):
+            if stop_exec:
+                break
+            try:
+                location = row[0]
+            except IndexError as e:
+                print(f"couldn't read location in row: {row} at {idx}, file: {charlines_fp}")
+            point_dict = {}
+            for i in range(1, len(row)-2, 3):
+                if row[i] == '' or row[i+1] == '' or row[i+2] == '':
+                    continue
+                try:
+                    x = _parse_coordinate(row[i].replace('"', ''))
+                    y = _parse_coordinate(row[i+1].replace('"', ''))
+                    z = _parse_coordinate(row[i+2].replace('"', ''))
+                    point_dict[header[i][2:]] = (x, y, z)
+                except ValueError as e:
+                    print(
+                        f"error reading point from characteristicpoints at location: {location} (index: {i}), error: {e}")
+                    stop_exec = True
+            charpoints[location] = point_dict
+    return charpoints
+def _parse_coordinate(coord):
+    """Convert string point coordinate to float, remove double dots if needed.
+       Some of the coordinates contain multiple dots, probably because someone
+       opened the file in excel and it formatted it weird. In all examples I've
+       seen the first point is only to indicate 1000's and can savely be removed
+    Args:
+        point (str): string representation of the number to parse
+    Returns:
+        float: float representation of the coordinate
+    """
+    try:
+        return float(coord)
+    except:
+        parts = coord.split(".")
+        return float("".join(parts[:-1]) + "." + parts[-1])
+def make_height_profiles(surfaceline_dict, max_profile_size):
+    """Make height arrays from surfacelines dict.
+    Args:
+        surfaceline_dict (dict): dict of surfacelines by location.
+        max_profile_size (int): fixed max size for the height profile.
+    Returns:
+        dict: dict containing height profiles by location.
+    """
+    profile_dict = {}
+    for location in surfaceline_dict.keys():
+        heights = np.array(surfaceline_dict[location])[:, 2].astype(np.float32)
+        # we'll fit whole profile in a fixed length so that multiple profiles can be used as samples
+        z_tmp = np.zeros(max_profile_size)
+        profile_length = heights.shape[0]
+        if profile_length < max_profile_size:
+            z_tmp[:profile_length] = np.array(heights, dtype=np.float32)[:profile_length]
+            z_tmp[profile_length:] = heights[profile_length-1]
+            heights = z_tmp
+        else:
+            heights = heights[:max_profile_size]
+        profile_dict[location] = {"profile": heights}
+    return profile_dict
+def make_labeled_height_profiles(surfaceline_dict, cpoints_dict, max_profile_size, class_list='simple', require_all_points=True):
+    """Make height profile and labels from surfacelines and cpoints.
+    Args:
+        surfaceline_dict (dict): dict of surfacelines by location.
+        cpoints_dict (dict): dict of characteristic points by location.
+        max_profile_size (int): fixed max size for the height profile.
+        class_list (bool): selection of classes to use, see config.
+        require_all_points: filter profiles that do not contain all the points in the class_list.
+    Returns:
+        dict: dict containing height profiles and their labels by location.
+    """
+    profile_label_dict = {}
+    class_list = class_list.lower()
+    class_dict = {}
+    if class_list == 'regional':
+        class_dict = CLASS_DICT_REGIONAL
+    elif class_list == 'simple':
+        class_dict = CLASS_DICT_SIMPLE
+    elif class_list == 'berm':
+        class_dict = CLASS_DICT_SIMPLE_BERM
+    elif class_list == 'sloot':
+        class_dict = CLASS_DICT_SIMPLE_SLOOT
+    elif class_list == 'full':
+        class_dict = CLASS_DICT_FULL
+    else:
+        raise NotImplementedError(f"No class list available of type: {class_list}")
+    required_point_types = list(class_dict.keys())
+    required_point_types.remove('leeg')  # we don't want to require check for the empty class
+    for location in surfaceline_dict.keys():
+        heights = np.array(surfaceline_dict[location])[:, 2].astype(np.float32)
+        labels = np.zeros(len(heights))
+        # if no labels were given for this location, skip it
+        if not location in cpoints_dict.keys():
+            # print(f"location not in cpoints dict, {location}")
+            continue
+        # skip the location if the required points are not all present
+        if require_all_points:
+            labeled_point_types = [key for key, value in cpoints_dict[location].items() if value != (-1.0, -1.0, -1.0)]
+            if not all([point_type in labeled_point_types for point_type in required_point_types]):
+                # print(f"not all point types present, missing {set(required_point_types) - set(labeled_point_types)}")
+                continue
+        for i, (key, point) in enumerate(cpoints_dict[location].items()):
+            # if the point is not empty, find the nearest point in the surface file,
+            # problems with rounding errors require matching by distance per point
+            if point == (-1.0, -1.0, -1.0):
+                continue
+            distances = []
+            for idx, surfacepoint in enumerate(surfaceline_dict[location]):
+                dist = np.linalg.norm(np.array(surfacepoint)-np.array(point))
+                distances.append((idx, dist))
+            (idx, dist) = sorted(distances, key=itemgetter(1))[0]
+            if key in class_dict:
+                labels[idx] = class_dict[key]
+        # forward fill the labels
+        for i in range(1, len(labels)):
+            if labels[i] == 0.0:
+                labels[i] = labels[i-1]
+        # we'll fit whole profile in a fixed length so that multiple profiles can be used as samples
+        z_tmp = np.zeros(max_profile_size)
+        labels_tmp = np.zeros(max_profile_size)
+        profile_length = labels.shape[0]
+        if profile_length < max_profile_size:
+            z_tmp[:profile_length] = np.array(heights, dtype=np.float32)[:profile_length]
+            labels_tmp[:profile_length] = np.array(labels)[:profile_length]
+            z_tmp[profile_length:] = heights[profile_length-1]
+            labels_tmp[profile_length:] = labels[profile_length-1]
+            heights = z_tmp
+            labels = labels_tmp
+        else:
+            heights = heights[:max_profile_size]
+            labels = labels[:max_profile_size]
+        # rescale every profile to between -1 and 1
+        # scaler = MinMaxScaler(feature_range=(-1, 1))
+        # heights = scaler.fit_transform(heights.reshape(-1, 1))
+        profile_label_dict[location] = {}
+        profile_label_dict[location]['profile'] = heights.astype(np.float32)
+        profile_label_dict[location]['label'] = labels.astype(np.int32)
+    return profile_label_dict
+def filepath_pair_to_labeled_sample(source_surfacelines, source_characteristicpoints, max_profile_size=352, class_list='simple', require_all_points=True):
+    """Convert pair of surfacelines and characteristicpoints filepaths to format suited for machine learning.
+    Args:
+        source_surfacelines (string): path to the surfacelines file.
+        source_characteristicpoints (string): path to the characteristicpoints file.
+        max_profile_size (int, optional): max size for the profile. Defaults to 352.
+        regional (bool): use regional point labelset, see config. Defaults to False.
+    Returns:
+        dict: dict containing height profile and labels by location.
+    """
+    surfaceline_dict = read_surfaceline_file(source_surfacelines)
+    cpoints_dict = read_charpoints_file(source_characteristicpoints)
+    profile_label_dict = make_labeled_height_profiles(
+                            surfaceline_dict,
+                            cpoints_dict,
+                            max_profile_size,
+                            class_list=class_list,
+                            require_all_points=require_all_points)
+    return profile_label_dict
+def file_pairs_to_tensor_profiles(filepair_list, max_profile_size=352, class_list='simple', require_all_points=True):
+    """Convert list of pairs of surfacelines and characteristicpoints to format suited for machine learning.
+    Args:
+        filepair_list (list): list of tuples containing the paths to the surfacelines and characteristicpoints files.
+        max_profile_size (int, optional): max size for the profile. Defaults to 352.
+        regional (bool): use regional point labelset, see config. Defaults to False.
+    Returns:
+        dict: Dict containing all the height profiles and labels by location.
+    """
+    all_profiles = {}
+    for source_surfacelines, source_characteristicpoints in filepair_list:
+        profile_label_dict = filepath_pair_to_labeled_sample(
+            source_surfacelines,
+            source_characteristicpoints,
+            max_profile_size,
+            class_list,
+            require_all_points=require_all_points)
+        for key, value in profile_label_dict.items():
+            all_profiles[key] = value
+    return all_profiles
+def get_file_pairs_from_dir(path, krp_format=False):
+    """Recursively get all pairs of lines and points files in a directory.
+    Args:
+        path (str): path to the root directory containing the lines and points csv files,
+                    directory is searched recursively for pairs.
+        krp (bool): Indicates that the folder contains csv files in the naming convention used by
+                    waterschap Vallei en Veluwe.
+    Returns:
+        list: list of tuples where the first item is the path to the surfacelines.csv and the second
+              the path to the characteristicpoints.csv
+    """
+    if krp_format:
+        return _get_file_pairs_from_dir_krp(path)
+    list_of_files = []
+    for (dirpath, _, filenames) in os.walk(path):
+        for filename in filenames:
+            if filename.endswith('lines.csv'):
+                if os.path.exists(os.sep.join([dirpath, filename])) and \
+                   os.path.exists(os.sep.join([dirpath, 'characteristicpoints.csv'])):
+                    list_of_files.append((
+                        os.sep.join([dirpath, filename]),
+                        os.sep.join([dirpath, 'characteristicpoints.csv'])))
+    return list_of_files
+def _get_file_pairs_from_dir_krp(path):
+    """Recursively get all pairs of lines and points files in a directory but in the format used
+    by Waterschap Vallei en Veluwe, same functionality as get_file_pairs_from_dir.
+    Args:
+        path (str): path to the root directory containing the lines and points csv files,
+                    directory is searched recursively for pairs
+    Returns:
+        list: list of tuples where the first item is the path to the surfacelines.csv and the second
+              the path to the characteristicpoints.csv
+    """
+    list_of_files = []
+    for (dirpath, _, filenames) in os.walk(path):
+        for filename in filenames:
+            if filename.endswith('.krp.csv'):
+                if os.path.exists(os.sep.join([dirpath, filename])) and \
+                   os.path.exists(os.sep.join([dirpath, filename.split(".krp")[0] + ".csv"])):
+                    list_of_files.append((
+                        os.sep.join([dirpath, filename.split(".krp")[0] + ".csv"]),
+                        os.sep.join([dirpath, filename])))
+    return list_of_files
+def load_datasets(annotation_tuples, custom_scaler_path=None, test_size=0.2, max_profile_size=512, class_list='simple', require_all_points=True):
+    """Load datasets given list of annotation tuples.
+    Args:
+        annotation_tuples ([(str,str)]): list of tuples of filepaths to the lines and points files.
+        custom_scaler_path (str, optional): path to a custom scaler to rescale the data. Defaults to None.
+        test_size (float, optional): Test size for the training. Defaults to 0.2.
+        max_profile_size (int, optional): max profile size. Defaults to 512.
+        class_list (str, optional): class_mapping/class_list to use. Defaults to 'simple'.
+        require_all_points (bool, optional): wether to drop profiles that don't contain all points in the mapping. Defaults to True.
+    Returns:
+        DijkprofileDataset, DijkprofileDataset: train and test dataset classes
+    """
+    profile_dict = file_pairs_to_tensor_profiles(annotation_tuples, max_profile_size=max_profile_size, class_list=class_list, require_all_points=require_all_points)
+    # construct dataloaders
+    id_list = list(profile_dict.keys())
+    [train, test] = train_test_split(id_list, shuffle=True, test_size=test_size)
+    dataset_train = DijkprofileDataset(profile_dict, train, custom_scaler_path=custom_scaler_path)
+    dataset_validation = DijkprofileDataset(profile_dict, test, custom_scaler_path=custom_scaler_path)
+    return dataset_train, dataset_validation

dijkprofile-annotator/dijkprofile_annotator/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+joblib==1.1.0
+matplotlib==3.5.0
+numpy==1.21.4
+Pillow==8.4.0
+scikit_learn==1.0.1
+seaborn==0.11.2
+torch==1.10.0

dijkprofile-annotator/dijkprofile_annotator/training/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .train import train
+from .train import get_loss_train
+from .train import accuracy_check
+from .train import accuracy_check_for_batch

dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (367 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/training/__pycache__/train.cpython-39.pyc ADDED Viewed

Binary file (5.97 kB). View file

dijkprofile-annotator/dijkprofile_annotator/training/train.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import dijkprofile_annotator.preprocessing as preprocessing
+import dijkprofile_annotator.utils as utils
+import numpy as np
+import torch
+import torch.nn as nn
+from dijkprofile_annotator.models import Dijknet
+from PIL import Image
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+def get_loss_train(model, data_train, criterion):
+    """generate loss over train set.
+    Args:
+        model (): model to use for prediction
+        data_train (torch.utils.data.DataLoader)): Dataloader containing the profiles
+        and labels
+        criterion (pytorch loss function, probably nn.CrossEntropyLoss): loss function to be used.
+    Returns:
+        float: total accuracy
+        float: total loss
+    """
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    total_acc = 0
+    total_loss = 0
+    for batch, (profile, masks) in enumerate(data_train):
+        with torch.no_grad():
+            profile = torch.Tensor(profile).to(device)
+            masks = torch.Tensor(masks).to(device)
+            outputs = model(profile)
+            loss = criterion(outputs, masks)
+            preds = torch.argmax(outputs, dim=1).float()
+            acc = accuracy_check_for_batch(masks.cpu(), preds.cpu(), profile.size()[0])
+            total_acc = total_acc + acc
+            total_loss = total_loss + loss.cpu().item()
+    return total_acc/(batch+1), total_loss/(batch + 1)
+def accuracy_check(mask, prediction):
+    """check accuracy of prediciton.
+    Args:
+        mask (torch.Tensor, PIL Image or str): labels
+        prediction (torch.Tensor, PIL Image or str): predictions
+    Returns:
+        float: accuracy of prediction given mask.
+    """
+    ims = [mask, prediction]
+    np_ims = []
+    for item in ims:
+        if 'str' in str(type(item)):
+            item = np.array(Image.open(item))
+        elif 'PIL' in str(type(item)):
+            item = np.array(item)
+        elif 'torch' in str(type(item)):
+            item = item.numpy()
+        np_ims.append(item)
+    compare = np.equal(np_ims[0], np_ims[1])
+    accuracy = np.sum(compare)
+    return accuracy/len(np_ims[0].flatten())
+def accuracy_check_for_batch(masks, predictions, batch_size):
+    """check accuracy of prediciton given mask.
+    Args:
+        masks (torch.Tensor): labels
+        predictions (torch.Tensor): predictions
+        batch_size (int): batch size of prediciton/mask.
+    Returns:
+        float: accuracy of prediction given mask.
+    """
+    total_acc = 0
+    for index in range(batch_size):
+        total_acc += accuracy_check(masks[index], predictions[index])
+    return total_acc/batch_size
+def train(annotation_tuples,
+          epochs=100,
+          batch_size_train=32,
+          batch_size_val=512,
+          num_workers=6,
+          custom_scaler_path=None,
+          class_list='simple',
+          test_size=0.2,
+          max_profile_size=512,
+          shuffle=True):
+    """[summary]
+    Args:
+        annotation_tuples ([type]): [description]
+        epochs (int, optional): [description]. Defaults to 100.
+        batch_size_train (int, optional): [description]. Defaults to 32.
+        batch_size_val (int, optional): [description]. Defaults to 512.
+        num_workers (int, optional): [description]. Defaults to 6.
+        custom_scaler_path ([type], optional): [description]. Defaults to None.
+        class_list (str, optional): [description]. Defaults to 'simple'.
+        test_size (float, optional): [description]. Defaults to 0.2.
+        max_profile_size (int, optional): [description]. Defaults to 512.
+        shuffle (bool, optional): [description]. Defaults to True.
+    Raises:
+        NotImplementedError: when given class_list is not implemented
+    Returns:
+        [type]: trained Dijknet model.
+    """
+    print(f"loading datasets")
+    train_dataset, test_dataset = preprocessing.load_datasets(annotation_tuples,
+                                                              custom_scaler_path=custom_scaler_path,
+                                                              test_size=test_size,
+                                                              max_profile_size=max_profile_size)
+    print(f"loaded datasets:")
+    print(f"    train: {len(train_dataset)} samples")
+    print(f"    test:  {len(test_dataset)} samples")
+    class_dict, _, class_weights = utils.get_class_dict(class_list)
+    print(f"constructing model with {len(class_dict)} output classes")
+    model = Dijknet(1, len(class_dict))
+    # parameters
+    train_params = {'batch_size': batch_size_train,
+                    'shuffle': shuffle,
+                    'num_workers': num_workers}
+    params_val = {'batch_size': batch_size_val,
+                  'shuffle': False,
+                  'num_workers': num_workers}
+    training_generator = DataLoader(train_dataset, **train_params)
+    validation_generator = DataLoader(test_dataset, **params_val)
+    # CUDA for PyTorch
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    # loss
+    criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weights).to(device))
+    # Optimizer
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+    print("starting training.")
+    # Loop over epochs
+    for epoch in range(epochs):
+        print("epoch: {}".format(epoch))
+        # Training
+        loss_list = []
+        model.train()
+        for local_batch, local_labels in tqdm(training_generator):
+            # bug with dataloader, it doesn't return the right size batch when it runs out of samples
+            if not local_labels.shape[0] == train_params['batch_size']:
+                continue
+            # Transfer to GPU
+            local_batch, local_labels = local_batch.to(device), local_labels.to(device).long()
+            # Model computations
+            outputs = model(local_batch)
+            local_labels = local_labels.reshape(train_params['batch_size'], -1)
+            loss = criterion(outputs, local_labels)
+            optimizer.zero_grad()
+            loss.backward()
+            # Update weights
+            optimizer.step()
+            loss_list.append(loss.detach().cpu().numpy())
+        # report average loss over epoch
+        print("training loss: ", np.mean(loss_list))
+        # Validation
+        model.eval()
+        batch_accuracies = []
+        batch_accuracies_iso = []
+        batch_loss_val = []
+        for local_batch, local_labels in validation_generator:
+            # get new batches
+            local_batch, local_labels = local_batch.to(device), local_labels.to(device).long()
+            # Model computations
+            outputs = model(local_batch)
+            # calc loss
+            loss = criterion(outputs, local_labels.reshape(local_labels.shape[0], -1))
+            batch_loss_val.append(loss.detach().cpu().numpy())
+            outputs_iso = utils.force_sequential_predictions(outputs, method='isotonic')
+            outputs_first = utils.force_sequential_predictions(outputs, method='first')
+            # compute accuracy for whole validation set
+            flat_output = torch.argmax(outputs, dim=1).cpu().reshape(local_batch.shape[0], 1, -1)
+            compare = flat_output == local_labels.cpu()
+            acc = np.sum(compare.numpy(), axis=2) / \
+                int(local_batch.shape[-1])  # * params_val['batch_size']
+            batch_accuracies.append(np.mean(acc, axis=0)[0])
+            flat_output = torch.argmax(outputs_iso, dim=1).cpu().reshape(local_batch.shape[0], 1, -1)
+            compare = flat_output == local_labels.cpu()
+            acc = np.sum(compare.numpy(), axis=2) / \
+                int(local_batch.shape[-1])  # * params_val['batch_size']
+            batch_accuracies_iso.append(np.mean(acc, axis=0)[0])
+        print("validation accuracy: {}".format(np.mean(batch_accuracies)))
+        print("validation accuracy isotonic regression: {}".format(np.mean(batch_accuracies_iso)))
+        print("validation loss: {}".format(np.mean(batch_loss_val)))
+        print("="*50)
+    return model

dijkprofile-annotator/dijkprofile_annotator/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .utils import extract_img
+from .utils import ffill
+from .utils import visualize_sample
+from .utils import visualize_prediction
+from .utils import visualize_files
+from .utils import visualize_dict
+from .utils import train_scaler
+from .utils import get_class_dict
+from .utils import force_sequential_predictions

dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (528 Bytes). View file

dijkprofile-annotator/dijkprofile_annotator/utils/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (10.3 kB). View file

dijkprofile-annotator/dijkprofile_annotator/utils/utils.py ADDED Viewed

	@@ -0,0 +1,350 @@

+import os
+import random
+from collections import defaultdict
+import dijkprofile_annotator.preprocessing as preprocessing
+import dijkprofile_annotator.config as config
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+import torch
+import torch.nn.functional as F
+from sklearn.isotonic import IsotonicRegression
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+def extract_img(size, in_tensor):
+    """
+    Args:
+        size(int) : size of cut
+        in_tensor(tensor) : tensor to be cut
+    """
+    dim1 = in_tensor.size()[2]
+    in_tensor = in_tensor[:, :, int((dim1-size)/2):int((size + (dim1-size)/2))]
+    return in_tensor
+def ffill(arr):
+    """Forward fill utility function.
+    Args:
+        arr (np.array): numpy array to fill
+    Returns:
+        np.array: filled array.
+    """
+    mask = np.isnan(arr)
+    idx = np.where(~mask, np.arange(mask.shape[1]), 0)
+    np.maximum.accumulate(idx, axis=1, out=idx)
+    out = arr[np.arange(idx.shape[0])[:,None], idx]
+    return out
+def train_scaler(profile_dict, scaler_type='minmax'):
+    """Train a scaler given a profile dict
+    Args:
+        profile_dict (dict): dict containing the profile heights and labels
+    Returns:
+        sklearn MinMaxScaler or StandardScaler: fitted scaler in sklearn format
+    """
+    if scaler_type == 'minmax':
+        scaler = MinMaxScaler(feature_range=(-1, 1))  # for neural networks -1,1 is better than 0,1
+    elif scaler_type == 'standard':
+        scaler = StandardScaler()
+    else:
+        raise NotImplementedError(f"no scaler: {scaler}")
+    randkey = random.choice(list(profile_dict.keys()))
+    accumulator = np.zeros((len(profile_dict), profile_dict[randkey]['profile'].shape[0]))
+    for i, key in enumerate(profile_dict.keys()):
+        accumulator[i, :] = profile_dict[key]['profile']
+    scaler.fit(accumulator.reshape(-1, 1))
+    return scaler
+def get_class_dict(class_list):
+    """Get correct class dicts and weights from config.
+    Args:
+        class_list (string): string representing the class mappings to use
+    Raises:
+        NotImplementedError: raise if an not implemented class mapping is passed
+    Returns:
+        (dict,dict,list): dict with class mappings, inverse of that dict, weights for each class.
+    """
+    class_list = class_list.lower()
+    if class_list == 'regional':
+        class_dict = config.CLASS_DICT_REGIONAL
+        inverse_class_dict = config.INVERSE_CLASS_DICT_REGIONAL
+        class_weights = config.WEIGHT_DICT_REGIONAL
+    elif class_list == 'simple':
+        class_dict = config.CLASS_DICT_SIMPLE
+        class_weights = config.WEIGHT_DICT_SIMPLE
+        inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE
+    elif class_list == 'berm':
+        class_dict = config.CLASS_DICT_SIMPLE_BERM
+        class_weights = config.WEIGHT_DICT_SIMPLE_BERM
+        inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE_BERM
+    elif class_list == 'sloot':
+        class_dict = config.CLASS_DICT_SIMPLE_SLOOT
+        class_weights = config.WEIGHT_DICT_SIMPLE_SLOOT
+        inverse_class_dict = config.INVERSE_CLASS_DICT_SIMPLE_SLOOT
+    elif class_list == 'full':
+        class_dict = config.CLASS_DICT_FULL
+        class_weights = config.WEIGHT_DICT_FULL
+        inverse_class_dict = config.INVERSE_CLASS_DICT_FULL
+    else:
+        raise NotImplementedError(f"No configs found for class list of type: {class_list}")
+    return class_dict, inverse_class_dict, class_weights
+def force_sequential_predictions(predictions, method='isotonic'):
+    """Force the classes in the sample to always go up from left to right. This is
+    makes sense because a higher class could never be left of a lower class in the
+    representation chosen here. Two methods are available, Isotonic Regression and
+    a group first method. I would use the Isotonic regression.
+    Args:
+        predictions (torch.Tensor): Tensor output of the model in shape (batch_size, channel_size, sample_size)
+        method (str, optional): method to use for enforcing the sequentiality. Defaults to 'isotonic'.
+    Raises:
+        NotImplementedError: if the given method is not implemented
+    Returns:
+        torch.Tensor: Tensor in the same shape as the input but then with only increasing classes from left to right.
+    """
+    predictions = predictions.detach().cpu()
+    n_classes = predictions.shape[1]  # 1 is the channel dimension
+    if method == 'first':
+        # loop over batch
+        for j in range(predictions.shape[0]):
+            pred = torch.argmax(predictions[j], dim=0)
+            # construct dict of groups of start-end indices for class
+            groups = defaultdict(list)
+            current_class = pred[0]
+            group_start_idx = 0
+            for i in range(1, len(pred)):
+                if pred[i] != current_class:
+                    groups[current_class.item()].append((group_start_idx, i))
+                    group_start_idx = i
+                    current_class = pred[i]
+            # if the class occurs again later in the profile
+            # discard this occurance of it
+            new_pred = torch.zeros(len(pred))
+            last_index = 0
+            for class_n, group_tuples in sorted(groups.items()):
+                for group_tuple in group_tuples:
+                    if group_tuple[0] >= last_index:
+                        new_pred[group_tuple[0]:group_tuple[1]] = class_n
+                        last_index = group_tuple[1]
+                        break
+            # simple forward fill
+            for i in range(1, len(new_pred)):
+                if new_pred[i] == 0:
+                    new_pred[i] = new_pred[i-1]
+            # encode back to one-hot tensor
+            predictions[j] = F.one_hot(new_pred.to(torch.int64), num_classes=n_classes).permute(1,0)
+    elif method == 'isotonic':
+        for i in range(predictions.shape[0]):
+            pred = torch.argmax(predictions[i], dim=0)
+            x = np.arange(0,len(pred))
+            iso_reg = IsotonicRegression().fit(x, pred)
+            new_pred = iso_reg.predict(x)
+            new_pred = np.round(new_pred)
+            # encode back to one-hot tensor
+            new_pred = F.one_hot(torch.Tensor(new_pred).to(torch.int64), num_classes=n_classes).permute(1,0)
+            predictions[i] = new_pred
+    else:
+        raise NotImplementedError(f"Unknown method: {method}")
+    return predictions
+def visualize_prediction(heights, prediction, labels, location_name, class_list):
+    """visualize a profile plus labels and prediction
+    Args:
+        heights (tensor): tensor containing the heights data of the profile
+        prediction (tensor): tensor containing the predicted data of the profile
+        labels (tensor): tensor containing the labels for each height point in heights
+        location_name (str): name of the profile, just for visualization
+        class_list (str): class mapping to use, determines which labels are visualized
+    """
+    class_dict, inverse_class_dict, _ = get_class_dict(class_list)
+    fig, ax = plt.subplots(figsize=(20,11))
+    plt.title(location_name)
+    plt.plot(heights, label='profile')
+    # change one-hot batched format to list of classes
+    if prediction.dim() == 3:
+        prediction = torch.argmax(torch.squeeze(prediction, dim=0), dim=0)
+    if prediction.dim() == 2:
+        # assuming channel first representation
+        prediction = torch.argmax(prediction, dim=0)
+    prediction = prediction.detach().cpu().numpy()
+    # ax.set_ylim(top=np.max(heights), bottom=np.min(heights))
+    label_height = np.min(heights)
+    n_labels = len(np.unique(labels))
+    label_height_distance = (np.max(heights) - np.min(heights))/(n_labels*2)
+    cmap = sns.color_palette("Set2", len(set(class_dict.values())))
+    # plot actual labels
+    prev_class_n = 999
+    for index, class_n in enumerate(labels):
+        if class_n == 0:
+            continue
+        if class_n != prev_class_n:
+            plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
+            plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
+            label_height += label_height_distance
+            prev_class_n = class_n
+    # plot predicted points
+    used_classes = []
+    prev_class_n = 999
+    for index, class_n in enumerate(prediction):
+        if class_n == 0 or class_n in used_classes:
+            continue
+        if class_n != prev_class_n:
+            plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(1,1))) # small dots
+            plt.text(index, label_height, "predicted " + inverse_class_dict[class_n], rotation=0)
+            label_height += label_height_distance
+            used_classes.append(prev_class_n)
+            prev_class_n = class_n
+    plt.show()
+def visualize_sample(heights, labels, location_name, class_list):
+    """visualize a profile and labels.
+    Args:
+        heights (tensor): tensor containing the heights data of the profile
+        labels (tensor): tensor containing the labels for each height point in heights
+        location_name (str): name of the profile, just for visualization
+        class_list (str): class mapping to use, determines which labels are visualized
+    """
+    class_dict, inverse_class_dict, _ = get_class_dict(class_list)
+    fig, ax = plt.subplots(figsize=(20,11))
+    plt.title(location_name)
+    plt.plot(heights, label='profile')
+    # ax.set_ylim(top=np.max(heights), bottom=np.min(heights))
+    label_height = np.min(heights)
+    n_labels = len(np.unique(labels))
+    label_height_distance = (np.max(heights) - np.min(heights))/(n_labels*2)
+    cmap = sns.color_palette("Set2", len(set(class_dict.values())))
+    # plot actual labels
+    prev_class_n = 999
+    for index, class_n in enumerate(labels):
+        if class_n == 0:
+            continue
+        if class_n != prev_class_n:
+            plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
+            plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
+            label_height += label_height_distance
+            prev_class_n = class_n
+    plt.show()
+def visualize_files(linesfp, pointsfp, max_profile_size=512, class_list='simple', location_index=0, return_dict=False):
+    """visualize profile lines and points filepaths.
+    Args:
+        linesfp (str): path to surfacelines file.
+        pointsfp (str): path to points file.
+        max_profile_size (int, optional): cutoff size of the profile, can leave on default here. Defaults to 512.
+        class_list (str, optional): class mapping to use. Defaults to 'simple'.
+        location_index (int, optional): index of profile to visualize.. Defaults to 0.
+        return_dict (bool, optional): return the profile dict for faster visualization. Defaults to False.
+    Returns:
+        [dict, optional]: profile dict containing the profiles of the given files
+    """
+    profile_label_dict = preprocessing.filepath_pair_to_labeled_sample(linesfp,
+                                                               pointsfp,
+                                                               max_profile_size=max_profile_size,
+                                                               class_list=class_list)
+    location_name = list(profile_label_dict.keys())[location_index]
+    heights = profile_label_dict[location_name]['profile']
+    labels = profile_label_dict[location_name]['label']
+    class_dict, inverse_class_dict, _ = get_class_dict(class_list)
+    fig, ax = plt.subplots(figsize=(20,11))
+    plt.title(location_name)
+    plt.plot(heights, label='profile')
+    label_height = np.min(heights)
+    n_labels = len(np.unique(labels))
+    label_height_distance = (np.max(heights) - np.min(heights))/(n_labels)
+    cmap = sns.color_palette("Set2", len(set(class_dict.values())))
+    # plot actual labels
+    prev_class_n = 999
+    for index, class_n in enumerate(labels):
+        if class_n == 0:
+            continue
+        if class_n != prev_class_n:
+            plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
+            plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
+            label_height += label_height_distance
+            prev_class_n = class_n
+    plt.show()
+    if return_dict:
+        return profile_label_dict
+def visualize_dict(profile_label_dict, class_list='simple', location_index=0):
+    """visualise profile with labels from profile_dict, profile specified by index.
+    Args:
+        profile_label_dict (dict): dict containing profiles and labels
+        class_list (str, optional): class_mapping to use for visualization. Defaults to 'simple'.
+        location_index (int, optional): specifies the index of the profile to visualize. Defaults to 0.
+    """
+    location_name = list(profile_label_dict.keys())[location_index]
+    heights = profile_label_dict[location_name]['profile']
+    labels = profile_label_dict[location_name]['label']
+    class_dict, inverse_class_dict, _ = get_class_dict(class_list)
+    fig, ax = plt.subplots(figsize=(20,11))
+    plt.title(location_name)
+    plt.plot(heights, label='profile')
+    label_height = np.min(heights)
+    n_labels = len(np.unique(labels))
+    label_height_distance = (np.max(heights) - np.min(heights))/(n_labels)
+    cmap = sns.color_palette("Set2", len(set(class_dict.values())))
+    # plot actual labels
+    prev_class_n = 999
+    for index, class_n in enumerate(labels):
+        if class_n == 0:
+            continue
+        if class_n != prev_class_n:
+            plt.axvline(index, 0,5, color=cmap[class_n], linestyle=(0,(5,10))) # loose dashes
+            plt.text(index, label_height, inverse_class_dict[class_n], rotation=0)
+            label_height += label_height_distance
+            prev_class_n = class_n
+    plt.show()

dijkprofile-annotator/dijkprofile_annotator/web/.gitkeep ADDED Viewed

File without changes

dijkprofile-annotator/dijkprofile_annotator/web/app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import math
+import gradio as gr
+import dijkprofile_annotator
+from zipfile import ZipFile
+def annotate_file(file_objects, model_type):
+    # TODO: actually use different model types based on selected model, only a well trained dijk model is available now.
+    generated_charfiles = []
+    str1 = "Starting processing of files."
+    pad1 = math.floor((os.get_terminal_size().columns - len(str1)) / 2) * "="
+    print(pad1 + "Starting processing of files." + pad1)
+    for i, file_obj in enumerate(file_objects):
+        target_filepath = f"/tmp/characteristicpoints_{i}.csv"
+        print(f"    Processing file '{file_obj.name}', model '{model_type}', saving to '{target_filepath}'")
+        dijkprofile_annotator.annotate(file_obj.name, target_filepath, device='cpu')
+        generated_charfiles.append(target_filepath)
+        print(f"    finished processing: {file_obj.name}! saved to : {target_filepath}")
+        print("    ", "-" * (os.get_terminal_size().columns - 5))
+    print("finished with all processing!")
+    # return the csv file if only 1 file was given, return a zip otherwise.
+    if len(generated_charfiles) == 1:
+        print(f"returning file: {generated_charfiles[0]}")
+        return generated_charfiles[0]
+    else:
+        return_zipfile = "/tmp/characterist_points.zip"
+        with ZipFile(return_zipfile, 'w') as zipObj:
+            for filepath in generated_charfiles:
+                zipObj.write(filepath)
+        print(f"returning file: {return_zipfile}")
+        return return_zipfile
+description = "Upload een surfacelines.csv bestand in QDAMEdit format en krijg een annotatie file in characteristicpoints format terug \n" +\
+              "Een neural netwerk gebaseerd op image segmentation heeft geleerd op basis van ~6000 geannoteerde profielen om zo goed mogelijk automatisch de punten te plaatsen op de profielen.\n" +\
+              "Er zijn meerdere modellen beschikbaar om de annotatie te genereren, het 'dijk' model probeert alleen de dijk te vinden, het 'dijk+sloot' model zoekt ook naar een sloot en het 'volledig' model " +\
+              "probeert zo veel mogelijk van de punten beschikbaar in het QDAMEdit format te vinden. Probeer eerst het 'dijk' model aangezien hier de consistentste resultaten uit komen."
+iface = gr.Interface(
+  fn=annotate_file,
+  title="Dijkprofiel Annotator",
+  description=description,
+  inputs=[gr.inputs.File(file_count="multiple", type="file", label="te annoteren surfacelines files", optional=False), gr.inputs.Dropdown(['dijk', 'dijk+sloot', "volledig"], type="value", default=None, label='Model type')],
+  outputs=gr.outputs.File(label="gegenereerde file"))
+iface.launch()

dijkprofile-annotator/output/models/.gitkeep ADDED Viewed

File without changes

dijkprofile-annotator/output/models/scaler.pik ADDED Viewed

Binary file (611 Bytes). View file

dijkprofile-annotator/output/reports/.gitkeep ADDED Viewed

File without changes

dijkprofile-annotator/output/visualizations/.gitkeep ADDED Viewed

File without changes

dijkprofile-annotator/run/configs/.gitkeep ADDED Viewed

File without changes

dijkprofile-annotator/setup.cfg ADDED Viewed

	@@ -0,0 +1,12 @@

+[pycodestyle]
+max-line-length = 120
+exclude = .ipynb_checkpoints
+[pep8]
+max-line-length = 120
+exclude = .ipynb_checkpoints
+[flake8]
+max-line-length = 120
+exclude = .ipynb_checkpoints
+max-complexity = 10

dijkprofile-annotator/setup.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from setuptools import setup, find_packages
+setup(name='dijkprofile_annotator',
+      version='0.1.0',
+      description='Automatically annotate drijkprofile in qDAMEdit format',
+      long_description=open('README.md').read(),
+      url='',
+      author='Jonathan Gerbscheid',
+      author_email='j.gerbscheid@hetwaterschapshuis.nl',
+      license='MIT',
+      package_dir={"dijkprofile_annotator": "dijkprofile_annotator"},
+      packages=find_packages(),
+      zip_safe=False,
+      install_requires=["joblib",
+                        "matplotlib",
+                        "numpy",
+                        "pillow",
+                        "scikit_learn>=1.0.1",
+                        "seaborn",
+                        "torch>=1.9.0"]
+      )

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+joblib==1.1.0
+matplotlib==3.5.0
+numpy==1.21.4
+Pillow==8.4.0
+scikit_learn==1.0.1
+seaborn==0.11.2
+torch==1.10.0
+./dijkprofile-annotator