Humboldt commited on
Commit
6ad6df4
·
1 Parent(s): 91a21c4

Upload 5 files

Browse files
Files changed (5) hide show
  1. dataset.py +144 -0
  2. models.py +357 -0
  3. requirements.txt +6 -0
  4. streamlit_trees.py +184 -0
  5. utils.py +135 -0
dataset.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from absl.flags import FLAGS
3
+
4
+ @tf.function
5
+ def transform_targets_for_output(y_true, grid_size, anchor_idxs):
6
+ # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))
7
+ N = tf.shape(y_true)[0]
8
+
9
+ # y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])
10
+ y_true_out = tf.zeros(
11
+ (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6))
12
+
13
+ anchor_idxs = tf.cast(anchor_idxs, tf.int32)
14
+
15
+ indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True)
16
+ updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)
17
+ idx = 0
18
+ for i in tf.range(N):
19
+ for j in tf.range(tf.shape(y_true)[1]):
20
+ if tf.equal(y_true[i][j][2], 0):
21
+ continue
22
+ anchor_eq = tf.equal(
23
+ anchor_idxs, tf.cast(y_true[i][j][5], tf.int32))
24
+
25
+ if tf.reduce_any(anchor_eq):
26
+ box = y_true[i][j][0:4]
27
+ box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2
28
+
29
+ anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32)
30
+ grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32)
31
+
32
+ # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
33
+ indexes = indexes.write(
34
+ idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]])
35
+ updates = updates.write(
36
+ idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])
37
+ idx += 1
38
+
39
+ # tf.print(indexes.stack())
40
+ # tf.print(updates.stack())
41
+
42
+ return tf.tensor_scatter_nd_update(
43
+ y_true_out, indexes.stack(), updates.stack())
44
+
45
+
46
+ def transform_targets(y_train, anchors, anchor_masks, size):
47
+ y_outs = []
48
+ grid_size = size // 32
49
+
50
+ # calculate anchor index for true boxes
51
+ anchors = tf.cast(anchors, tf.float32)
52
+ anchor_area = anchors[..., 0] * anchors[..., 1]
53
+ box_wh = y_train[..., 2:4] - y_train[..., 0:2]
54
+ box_wh = tf.tile(tf.expand_dims(box_wh, -2),
55
+ (1, 1, tf.shape(anchors)[0], 1))
56
+ box_area = box_wh[..., 0] * box_wh[..., 1]
57
+ intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \
58
+ tf.minimum(box_wh[..., 1], anchors[..., 1])
59
+ iou = intersection / (box_area + anchor_area - intersection)
60
+ anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32)
61
+ anchor_idx = tf.expand_dims(anchor_idx, axis=-1)
62
+
63
+ y_train = tf.concat([y_train, anchor_idx], axis=-1)
64
+
65
+ for anchor_idxs in anchor_masks:
66
+ y_outs.append(transform_targets_for_output(
67
+ y_train, grid_size, anchor_idxs))
68
+ grid_size *= 2
69
+
70
+ return tuple(y_outs)
71
+
72
+
73
+ def transform_images(x_train, size):
74
+ x_train = tf.image.resize(x_train, (size, size))
75
+ x_train = x_train / 255
76
+ return x_train
77
+
78
+
79
+ # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline
80
+ # Commented out fields are not required in our project
81
+ IMAGE_FEATURE_MAP = {
82
+ # 'image/width': tf.io.FixedLenFeature([], tf.int64),
83
+ # 'image/height': tf.io.FixedLenFeature([], tf.int64),
84
+ # 'image/filename': tf.io.FixedLenFeature([], tf.string),
85
+ # 'image/source_id': tf.io.FixedLenFeature([], tf.string),
86
+ # 'image/key/sha256': tf.io.FixedLenFeature([], tf.string),
87
+ 'image/encoded': tf.io.FixedLenFeature([], tf.string),
88
+ # 'image/format': tf.io.FixedLenFeature([], tf.string),
89
+ 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
90
+ 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
91
+ 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
92
+ 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
93
+ 'image/object/class/text': tf.io.VarLenFeature(tf.string),
94
+ # 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
95
+ # 'image/object/difficult': tf.io.VarLenFeature(tf.int64),
96
+ # 'image/object/truncated': tf.io.VarLenFeature(tf.int64),
97
+ # 'image/object/view': tf.io.VarLenFeature(tf.string),
98
+ }
99
+
100
+
101
+ def parse_tfrecord(tfrecord, class_table, size):
102
+ x = tf.io.parse_single_example(tfrecord, IMAGE_FEATURE_MAP)
103
+ x_train = tf.image.decode_jpeg(x['image/encoded'], channels=3)
104
+ x_train = tf.image.resize(x_train, (size, size))
105
+
106
+ class_text = tf.sparse.to_dense(
107
+ x['image/object/class/text'], default_value='')
108
+ labels = tf.cast(class_table.lookup(class_text), tf.float32)
109
+ y_train = tf.stack([tf.sparse.to_dense(x['image/object/bbox/xmin']),
110
+ tf.sparse.to_dense(x['image/object/bbox/ymin']),
111
+ tf.sparse.to_dense(x['image/object/bbox/xmax']),
112
+ tf.sparse.to_dense(x['image/object/bbox/ymax']),
113
+ labels], axis=1)
114
+
115
+ paddings = [[0, FLAGS.yolo_max_boxes - tf.shape(y_train)[0]], [0, 0]]
116
+ y_train = tf.pad(y_train, paddings)
117
+
118
+ return x_train, y_train
119
+
120
+
121
+ def load_tfrecord_dataset(file_pattern, class_file, size=416):
122
+ LINE_NUMBER = -1 # TODO: use tf.lookup.TextFileIndex.LINE_NUMBER
123
+ class_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer(
124
+ class_file, tf.string, 0, tf.int64, LINE_NUMBER, delimiter="\n"), -1)
125
+
126
+ files = tf.data.Dataset.list_files(file_pattern)
127
+ dataset = files.flat_map(tf.data.TFRecordDataset)
128
+ return dataset.map(lambda x: parse_tfrecord(x, class_table, size))
129
+
130
+
131
+ def load_fake_dataset():
132
+ x_train = tf.image.decode_jpeg(
133
+ open('./data/girl.png', 'rb').read(), channels=3)
134
+ x_train = tf.expand_dims(x_train, axis=0)
135
+
136
+ labels = [
137
+ [0.18494931, 0.03049111, 0.9435849, 0.96302897, 0],
138
+ [0.01586703, 0.35938117, 0.17582396, 0.6069674, 56],
139
+ [0.09158827, 0.48252046, 0.26967454, 0.6403017, 67]
140
+ ] + [[0, 0, 0, 0, 0]] * 5
141
+ y_train = tf.convert_to_tensor(labels, tf.float32)
142
+ y_train = tf.expand_dims(y_train, axis=0)
143
+
144
+ return tf.data.Dataset.from_tensor_slices((x_train, y_train))
models.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from absl import flags
2
+ from absl.flags import FLAGS
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ from tensorflow.keras import Model
6
+ from tensorflow.keras.layers import (
7
+ Add,
8
+ Concatenate,
9
+ Conv2D,
10
+ Input,
11
+ Lambda,
12
+ LeakyReLU,
13
+ MaxPool2D,
14
+ UpSampling2D,
15
+ ZeroPadding2D,
16
+ BatchNormalization,
17
+ )
18
+ from tensorflow.keras.regularizers import l2
19
+ from tensorflow.keras.losses import (
20
+ binary_crossentropy,
21
+ sparse_categorical_crossentropy
22
+ )
23
+ from .utils import broadcast_iou
24
+
25
+ flags.DEFINE_integer('yolo_max_boxes', 100,
26
+ 'maximum number of boxes per image')
27
+
28
+
29
+ #flags.DEFINE_float('yolo_iou_threshold', 0.1, 'iou threshold')
30
+ #flags.DEFINE_float('yolo_score_threshold', 0.1, 'score threshold')
31
+
32
+
33
+ yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
34
+ (59, 119), (116, 90), (156, 198), (373, 326)],
35
+ np.float32) / 416
36
+ yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])
37
+
38
+ yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),
39
+ (81, 82), (135, 169), (344, 319)],
40
+ np.float32) / 416
41
+ yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])
42
+
43
+
44
+ def DarknetConv(x, filters, size, strides=1, batch_norm=True):
45
+ if strides == 1:
46
+ padding = 'same'
47
+ else:
48
+ x = ZeroPadding2D(((1, 0), (1, 0)))(x) # top left half-padding
49
+ padding = 'valid'
50
+ x = Conv2D(filters=filters, kernel_size=size,
51
+ strides=strides, padding=padding,
52
+ use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
53
+ if batch_norm:
54
+ x = BatchNormalization()(x)
55
+ x = LeakyReLU(alpha=0.1)(x)
56
+ return x
57
+
58
+
59
+ def DarknetResidual(x, filters):
60
+ prev = x
61
+ x = DarknetConv(x, filters // 2, 1)
62
+ x = DarknetConv(x, filters, 3)
63
+ x = Add()([prev, x])
64
+ return x
65
+
66
+
67
+ def DarknetBlock(x, filters, blocks):
68
+ x = DarknetConv(x, filters, 3, strides=2)
69
+ for _ in range(blocks):
70
+ x = DarknetResidual(x, filters)
71
+ return x
72
+
73
+
74
+ def Darknet(name=None):
75
+ x = inputs = Input([None, None, 3])
76
+ x = DarknetConv(x, 32, 3)
77
+ x = DarknetBlock(x, 64, 1)
78
+ x = DarknetBlock(x, 128, 2) # skip connection
79
+ x = x_36 = DarknetBlock(x, 256, 8) # skip connection
80
+ x = x_61 = DarknetBlock(x, 512, 8)
81
+ x = DarknetBlock(x, 1024, 4)
82
+ return tf.keras.Model(inputs, (x_36, x_61, x), name=name)
83
+
84
+
85
+ def DarknetTiny(name=None):
86
+ x = inputs = Input([None, None, 3])
87
+ x = DarknetConv(x, 16, 3)
88
+ x = MaxPool2D(2, 2, 'same')(x)
89
+ x = DarknetConv(x, 32, 3)
90
+ x = MaxPool2D(2, 2, 'same')(x)
91
+ x = DarknetConv(x, 64, 3)
92
+ x = MaxPool2D(2, 2, 'same')(x)
93
+ x = DarknetConv(x, 128, 3)
94
+ x = MaxPool2D(2, 2, 'same')(x)
95
+ x = x_8 = DarknetConv(x, 256, 3) # skip connection
96
+ x = MaxPool2D(2, 2, 'same')(x)
97
+ x = DarknetConv(x, 512, 3)
98
+ x = MaxPool2D(2, 1, 'same')(x)
99
+ x = DarknetConv(x, 1024, 3)
100
+ return tf.keras.Model(inputs, (x_8, x), name=name)
101
+
102
+
103
+ def YoloConv(filters, name=None):
104
+ def yolo_conv(x_in):
105
+ if isinstance(x_in, tuple):
106
+ inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
107
+ x, x_skip = inputs
108
+
109
+ # concat with skip connection
110
+ x = DarknetConv(x, filters, 1)
111
+ x = UpSampling2D(2)(x)
112
+ x = Concatenate()([x, x_skip])
113
+ else:
114
+ x = inputs = Input(x_in.shape[1:])
115
+
116
+ x = DarknetConv(x, filters, 1)
117
+ x = DarknetConv(x, filters * 2, 3)
118
+ x = DarknetConv(x, filters, 1)
119
+ x = DarknetConv(x, filters * 2, 3)
120
+ x = DarknetConv(x, filters, 1)
121
+ return Model(inputs, x, name=name)(x_in)
122
+ return yolo_conv
123
+
124
+
125
+ def YoloConvTiny(filters, name=None):
126
+ def yolo_conv(x_in):
127
+ if isinstance(x_in, tuple):
128
+ inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
129
+ x, x_skip = inputs
130
+
131
+ # concat with skip connection
132
+ x = DarknetConv(x, filters, 1)
133
+ x = UpSampling2D(2)(x)
134
+ x = Concatenate()([x, x_skip])
135
+ else:
136
+ x = inputs = Input(x_in.shape[1:])
137
+ x = DarknetConv(x, filters, 1)
138
+
139
+ return Model(inputs, x, name=name)(x_in)
140
+ return yolo_conv
141
+
142
+
143
+ def YoloOutput(filters, anchors, classes, name=None):
144
+ def yolo_output(x_in):
145
+ x = inputs = Input(x_in.shape[1:])
146
+ x = DarknetConv(x, filters * 2, 3)
147
+ x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
148
+ x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
149
+ anchors, classes + 5)))(x)
150
+ return tf.keras.Model(inputs, x, name=name)(x_in)
151
+ return yolo_output
152
+
153
+
154
+ # As tensorflow lite doesn't support tf.size used in tf.meshgrid,
155
+ # we reimplemented a simple meshgrid function that use basic tf function.
156
+ def _meshgrid(n_a, n_b):
157
+
158
+ return [
159
+ tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),
160
+ tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))
161
+ ]
162
+
163
+
164
+ def yolo_boxes(pred, anchors, classes):
165
+ # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
166
+ grid_size = tf.shape(pred)[1:3]
167
+ box_xy, box_wh, objectness, class_probs = tf.split(
168
+ pred, (2, 2, 1, classes), axis=-1)
169
+
170
+ box_xy = tf.sigmoid(box_xy)
171
+ objectness = tf.sigmoid(objectness)
172
+ class_probs = tf.sigmoid(class_probs)
173
+ pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss
174
+
175
+ # !!! grid[x][y] == (y, x)
176
+ grid = _meshgrid(grid_size[1], grid_size[0])
177
+ grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2]
178
+
179
+ box_xy = (box_xy + tf.cast(grid, tf.float32)) / \
180
+ tf.cast(grid_size, tf.float32)
181
+ box_wh = tf.exp(box_wh) * anchors
182
+
183
+ box_x1y1 = box_xy - box_wh / 2
184
+ box_x2y2 = box_xy + box_wh / 2
185
+ bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)
186
+
187
+ return bbox, objectness, class_probs, pred_box
188
+
189
+
190
+ def yolo_nms(outputs, anchors, masks, classes):
191
+ # boxes, conf, type
192
+ b, c, t = [], [], []
193
+
194
+ for o in outputs:
195
+ b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
196
+ c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))
197
+ t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))
198
+
199
+ bbox = tf.concat(b, axis=1)
200
+ confidence = tf.concat(c, axis=1)
201
+ class_probs = tf.concat(t, axis=1)
202
+
203
+ # If we only have one class, do not multiply by class_prob (always 0.5)
204
+ if classes == 1:
205
+ scores = confidence
206
+ else:
207
+ scores = confidence * class_probs
208
+
209
+ dscores = tf.squeeze(scores, axis=0)
210
+ scores = tf.reduce_max(dscores, [1])
211
+ bbox = tf.reshape(bbox, (-1, 4))
212
+ classes = tf.argmax(dscores, 1)
213
+
214
+ print(tf.reduce_max(scores))
215
+
216
+ selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(
217
+ boxes=bbox,
218
+ scores=scores,
219
+ max_output_size=FLAGS.yolo_max_boxes,
220
+ iou_threshold=FLAGS.yolo_iou_threshold,
221
+ score_threshold=FLAGS.yolo_score_threshold,
222
+ soft_nms_sigma=0.5
223
+ )
224
+
225
+ num_valid_nms_boxes = tf.shape(selected_indices)[0]
226
+
227
+ selected_indices = tf.concat([selected_indices, tf.zeros(
228
+ FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.int32)], 0)
229
+ selected_scores = tf.concat([selected_scores, tf.zeros(
230
+ FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.float32)], -1)
231
+
232
+ boxes = tf.gather(bbox, selected_indices)
233
+ boxes = tf.expand_dims(boxes, axis=0)
234
+ scores = selected_scores
235
+ scores = tf.expand_dims(scores, axis=0)
236
+ classes = tf.gather(classes, selected_indices)
237
+ classes = tf.expand_dims(classes, axis=0)
238
+ valid_detections = num_valid_nms_boxes
239
+ valid_detections = tf.expand_dims(valid_detections, axis=0)
240
+
241
+ return boxes, scores, classes, valid_detections
242
+
243
+
244
+ def YoloV3(size=None, channels=3, anchors=yolo_anchors,
245
+ masks=yolo_anchor_masks, classes=80, training=False):
246
+ x = inputs = Input([size, size, channels], name='input')
247
+
248
+ x_36, x_61, x = Darknet(name='yolo_darknet')(x)
249
+
250
+ x = YoloConv(512, name='yolo_conv_0')(x)
251
+ output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x)
252
+
253
+ x = YoloConv(256, name='yolo_conv_1')((x, x_61))
254
+ output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)
255
+
256
+ x = YoloConv(128, name='yolo_conv_2')((x, x_36))
257
+ output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)
258
+
259
+ if training:
260
+ return Model(inputs, (output_0, output_1, output_2), name='yolov3')
261
+
262
+ boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
263
+ name='yolo_boxes_0')(output_0)
264
+ boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
265
+ name='yolo_boxes_1')(output_1)
266
+ boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),
267
+ name='yolo_boxes_2')(output_2)
268
+
269
+ outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
270
+ name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))
271
+
272
+ return Model(inputs, outputs, name='yolov3')
273
+
274
+
275
+ def YoloV3Tiny(size=None, channels=3, anchors=yolo_tiny_anchors,
276
+ masks=yolo_tiny_anchor_masks, classes=80, training=False):
277
+ x = inputs = Input([size, size, channels], name='input')
278
+
279
+ x_8, x = DarknetTiny(name='yolo_darknet')(x)
280
+
281
+ x = YoloConvTiny(256, name='yolo_conv_0')(x)
282
+ output_0 = YoloOutput(256, len(masks[0]), classes, name='yolo_output_0')(x)
283
+
284
+ x = YoloConvTiny(128, name='yolo_conv_1')((x, x_8))
285
+ output_1 = YoloOutput(128, len(masks[1]), classes, name='yolo_output_1')(x)
286
+
287
+ if training:
288
+ return Model(inputs, (output_0, output_1), name='yolov3')
289
+
290
+ boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
291
+ name='yolo_boxes_0')(output_0)
292
+ boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
293
+ name='yolo_boxes_1')(output_1)
294
+ outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
295
+ name='yolo_nms')((boxes_0[:3], boxes_1[:3]))
296
+ return Model(inputs, outputs, name='yolov3_tiny')
297
+
298
+
299
+ def YoloLoss(anchors, classes=80, ignore_thresh=0.5):
300
+ def yolo_loss(y_true, y_pred):
301
+ # 1. transform all pred outputs
302
+ # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
303
+ pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
304
+ y_pred, anchors, classes)
305
+ pred_xy = pred_xywh[..., 0:2]
306
+ pred_wh = pred_xywh[..., 2:4]
307
+
308
+ # 2. transform all true outputs
309
+ # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
310
+ true_box, true_obj, true_class_idx = tf.split(
311
+ y_true, (4, 1, 1), axis=-1)
312
+ true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
313
+ true_wh = true_box[..., 2:4] - true_box[..., 0:2]
314
+
315
+ # give higher weights to small boxes
316
+ box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]
317
+
318
+ # 3. inverting the pred box equations
319
+ grid_size = tf.shape(y_true)[1]
320
+ grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
321
+ grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
322
+ true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
323
+ tf.cast(grid, tf.float32)
324
+ true_wh = tf.math.log(true_wh / anchors)
325
+ true_wh = tf.where(tf.math.is_inf(true_wh),
326
+ tf.zeros_like(true_wh), true_wh)
327
+
328
+ # 4. calculate all masks
329
+ obj_mask = tf.squeeze(true_obj, -1)
330
+ # ignore false positive when iou is over threshold
331
+ best_iou = tf.map_fn(
332
+ lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask(
333
+ x[1], tf.cast(x[2], tf.bool))), axis=-1),
334
+ (pred_box, true_box, obj_mask),
335
+ tf.float32)
336
+ ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
337
+
338
+ # 5. calculate all losses
339
+ xy_loss = obj_mask * box_loss_scale * \
340
+ tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
341
+ wh_loss = obj_mask * box_loss_scale * \
342
+ tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
343
+ obj_loss = binary_crossentropy(true_obj, pred_obj)
344
+ obj_loss = obj_mask * obj_loss + \
345
+ (1 - obj_mask) * ignore_mask * obj_loss
346
+ # TODO: use binary_crossentropy instead
347
+ class_loss = obj_mask * sparse_categorical_crossentropy(
348
+ true_class_idx, pred_class)
349
+
350
+ # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
351
+ xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
352
+ wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
353
+ obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
354
+ class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
355
+
356
+ return xy_loss + wh_loss + obj_loss + class_loss
357
+ return yolo_loss
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tensorflow==2.10.
2
+ opencv-python==4.2.0.32
3
+ lxml
4
+ geotiff==0.2.7
5
+
6
+ -e .
streamlit_trees.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Jul 13 16:58:28 2021
4
+
5
+ @author: Alfa
6
+ """
7
+
8
+ import time
9
+ import streamlit as st
10
+ import tensorflow as tf
11
+ from tensorflow.keras.preprocessing.image import load_img
12
+ from tensorflow.keras.preprocessing.image import img_to_array
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+
17
+ from absl import app, flags, logging
18
+ from absl.flags import FLAGS
19
+ import cv2
20
+
21
+ from yolov3_tf2.models import (YoloV3, YoloV3Tiny)
22
+ from yolov3_tf2.dataset import transform_images
23
+ from yolov3_tf2.utils import draw_outputs
24
+ import matplotlib.pyplot as plt
25
+
26
+ from geotiff import GeoTiff
27
+
28
+ import os
29
+ import sys
30
+
31
+
32
+ image = load_img(r'C:\Users\alfa\Desktop\Python\Baum Projekt Labels\trees.jpg')
33
+ image = img_to_array(image).astype('float')/255
34
+
35
+ st.image(image)
36
+
37
+ c1, c2, c3 = st.columns([0.2, 0.6, 0.2])
38
+
39
+ file = c2.file_uploader(label='Upload GeoTIFF file')
40
+
41
+ my_bar = c2.progress(0)
42
+
43
+ if file == None:
44
+ pass
45
+ else:
46
+
47
+ geo_tiff = GeoTiff(file)
48
+
49
+ # the original crs code
50
+ # geo_tiff.crs_code
51
+ # the current crs code
52
+ # geo_tiff.as_crs
53
+ # the shape of the tiff
54
+ # geo_tiff.tif_shape
55
+ # the bounding box in the as_crs CRS
56
+ # geo_tiff.tif_bBox
57
+ # the bounding box as WGS 84
58
+ # geo_tiff.tif_bBox_wgs_84
59
+ # the bounding box in the as_crs converted coordinates
60
+ # geo_tiff.tif_bBox_converted
61
+
62
+ i = geo_tiff.tif_shape[1]
63
+ j = geo_tiff.tif_shape[0]
64
+ # in the as_crs coords
65
+ # geo_tiff.get_coords(i, j)
66
+ # in WGS 84 coords
67
+ # print(geo_tiff.get_wgs_84_coords(i, j))
68
+ # print(geo_tiff.get_wgs_84_coords(0, 0))
69
+
70
+ # degrees per Pixel in x-direction
71
+ deg_pixel_x = (geo_tiff.get_wgs_84_coords(i, j)[
72
+ 0]-geo_tiff.get_wgs_84_coords(0, 0)[0])/(i, -j)[0]
73
+ deg_pixel_y = (geo_tiff.get_wgs_84_coords(i, j)[
74
+ 1]-geo_tiff.get_wgs_84_coords(0, 0)[1])/(i, -j)[1]
75
+
76
+ start_x = geo_tiff.get_wgs_84_coords(0, 0)[0]
77
+ start_y = geo_tiff.get_wgs_84_coords(i, j)[1]
78
+
79
+ size = 416
80
+
81
+ FLAGS(sys.argv)
82
+ flags.DEFINE_string('classes', './data/trees_simple.names', 'path to classes file')
83
+ flags.DEFINE_string('weights', './checkpoints/trees_all.tf',
84
+ 'path to weights file')
85
+ flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
86
+ flags.DEFINE_integer('size', 416, 'resize images to')
87
+ flags.DEFINE_string('video', './data/video.mp4',
88
+ 'path to video file or number for webcam)')
89
+ flags.DEFINE_string('output', './data/video2.mp4', 'path to output video')
90
+ flags.DEFINE_string('output_format', 'XVID',
91
+ 'codec used in VideoWriter when saving video to file')
92
+ flags.DEFINE_integer('num_classes', 5, 'number of classes in the model')
93
+
94
+ flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold')
95
+ flags.DEFINE_float('yolo_score_threshold', 0.15, 'score threshold')
96
+
97
+ physical_devices = tf.config.experimental.list_physical_devices('GPU')
98
+ for physical_device in physical_devices:
99
+ tf.config.experimental.set_memory_growth(physical_device, True)
100
+
101
+ if FLAGS.tiny:
102
+ yolo = YoloV3Tiny(classes=FLAGS.num_classes)
103
+ else:
104
+ yolo = YoloV3(classes=FLAGS.num_classes)
105
+
106
+ yolo.load_weights(FLAGS.weights)
107
+ logging.info('weights loaded')
108
+
109
+ class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
110
+ logging.info('classes loaded')
111
+
112
+ times = []
113
+
114
+ try:
115
+ vid = cv2.VideoCapture(int(FLAGS.video))
116
+ except:
117
+ vid = cv2.VideoCapture(FLAGS.video)
118
+
119
+ out = None
120
+
121
+ images = []
122
+ bboxes_x_found = []
123
+ bboxes_y_found = []
124
+ classes_found = []
125
+ scores_found = []
126
+ for m in (range(int(i/size))):
127
+
128
+ my_bar.progress(int(m/int(i/size)*100))
129
+
130
+ for n in range(int(j/size)):
131
+
132
+ area_box = [(start_x+m*deg_pixel_x*size, start_y+n*deg_pixel_y*size), (start_x+m *
133
+ deg_pixel_x*size+size*deg_pixel_x, start_y+n*deg_pixel_y*size+size*deg_pixel_y)]
134
+
135
+ array = geo_tiff.read_box(area_box.copy())
136
+
137
+ img = array
138
+
139
+ # img_in = np.arra([img[:, :, :3], img[:, :, :3], img[:, :, :3]])
140
+ img_in = tf.expand_dims(img[:, :, :3], 0)
141
+ img_in = transform_images(img_in, FLAGS.size)
142
+
143
+ t1 = time.time()
144
+ boxes, scores, classes, nums = yolo.predict(img_in, verbose=False)
145
+
146
+ if nums > 0:
147
+ t2 = time.time()
148
+ times.append(t2-t1)
149
+ times = times[-20:]
150
+ img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
151
+ img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
152
+ cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
153
+
154
+ images.append(img)
155
+
156
+ for ind in range(nums[0]):
157
+ classes_found.append(class_names[int(classes[0][ind])])
158
+ scores_found.append(np.array(scores[0][ind]))
159
+ bboxes_x_found.append(
160
+ np.array(boxes[0][ind][0]*deg_pixel_x*size+start_x+m*deg_pixel_x*size))
161
+ bboxes_y_found.append(
162
+ np.array(boxes[0][ind][1]*deg_pixel_y*size+start_y+n*deg_pixel_y*size))
163
+
164
+ # plt.imshow(img)
165
+ # plt.show()
166
+ print()
167
+ print(classes_found)
168
+ print()
169
+ print(bboxes_x_found)
170
+ print()
171
+ print(bboxes_y_found)
172
+ found = np.concatenate((classes_found, scores_found,
173
+ bboxes_x_found, bboxes_y_found), axis=1)
174
+ # np.savetxt(r'C:\Users\alfa\Desktop\Python\Baum Projekt Labels\found trees.txt',
175
+ # found, fmt=['%s', '%.0f', '%.7f', '%.7f'])
176
+
177
+ z2 = found
178
+
179
+ z2 = pd.DataFrame(z2, columns=['Longitude', 'Lattitude', 'Class', 'Certainty'])
180
+
181
+ z3 = z2.to_csv()
182
+ st.download_button('Download *.csv file', z3)
183
+
184
+ st.dataframe(data=z2)
utils.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from absl import logging
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ import cv2
5
+
6
+ YOLOV3_LAYER_LIST = [
7
+ 'yolo_darknet',
8
+ 'yolo_conv_0',
9
+ 'yolo_output_0',
10
+ 'yolo_conv_1',
11
+ 'yolo_output_1',
12
+ 'yolo_conv_2',
13
+ 'yolo_output_2',
14
+ ]
15
+
16
+ YOLOV3_TINY_LAYER_LIST = [
17
+ 'yolo_darknet',
18
+ 'yolo_conv_0',
19
+ 'yolo_output_0',
20
+ 'yolo_conv_1',
21
+ 'yolo_output_1',
22
+ ]
23
+
24
+
25
+ def load_darknet_weights(model, weights_file, tiny=False):
26
+ wf = open(weights_file, 'rb')
27
+ major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
28
+
29
+ if tiny:
30
+ layers = YOLOV3_TINY_LAYER_LIST
31
+ else:
32
+ layers = YOLOV3_LAYER_LIST
33
+
34
+ for layer_name in layers:
35
+ sub_model = model.get_layer(layer_name)
36
+ for i, layer in enumerate(sub_model.layers):
37
+ if not layer.name.startswith('conv2d'):
38
+ continue
39
+ batch_norm = None
40
+ if i + 1 < len(sub_model.layers) and \
41
+ sub_model.layers[i + 1].name.startswith('batch_norm'):
42
+ batch_norm = sub_model.layers[i + 1]
43
+
44
+ logging.info("{}/{} {}".format(
45
+ sub_model.name, layer.name, 'bn' if batch_norm else 'bias'))
46
+
47
+ filters = layer.filters
48
+ size = layer.kernel_size[0]
49
+ in_dim = layer.get_input_shape_at(0)[-1]
50
+
51
+ if batch_norm is None:
52
+ conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
53
+ else:
54
+ # darknet [beta, gamma, mean, variance]
55
+ bn_weights = np.fromfile(
56
+ wf, dtype=np.float32, count=4 * filters)
57
+ # tf [gamma, beta, mean, variance]
58
+ bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
59
+
60
+ # darknet shape (out_dim, in_dim, height, width)
61
+ conv_shape = (filters, in_dim, size, size)
62
+ conv_weights = np.fromfile(
63
+ wf, dtype=np.float32, count=np.product(conv_shape))
64
+ # tf shape (height, width, in_dim, out_dim)
65
+ conv_weights = conv_weights.reshape(
66
+ conv_shape).transpose([2, 3, 1, 0])
67
+
68
+ if batch_norm is None:
69
+ layer.set_weights([conv_weights, conv_bias])
70
+ else:
71
+ layer.set_weights([conv_weights])
72
+ batch_norm.set_weights(bn_weights)
73
+
74
+ assert len(wf.read()) == 0, 'failed to read all data'
75
+ wf.close()
76
+
77
+
78
+ def broadcast_iou(box_1, box_2):
79
+ # box_1: (..., (x1, y1, x2, y2))
80
+ # box_2: (N, (x1, y1, x2, y2))
81
+
82
+ # broadcast boxes
83
+ box_1 = tf.expand_dims(box_1, -2)
84
+ box_2 = tf.expand_dims(box_2, 0)
85
+ # new_shape: (..., N, (x1, y1, x2, y2))
86
+ new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
87
+ box_1 = tf.broadcast_to(box_1, new_shape)
88
+ box_2 = tf.broadcast_to(box_2, new_shape)
89
+
90
+ int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) -
91
+ tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
92
+ int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) -
93
+ tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
94
+ int_area = int_w * int_h
95
+ box_1_area = (box_1[..., 2] - box_1[..., 0]) * \
96
+ (box_1[..., 3] - box_1[..., 1])
97
+ box_2_area = (box_2[..., 2] - box_2[..., 0]) * \
98
+ (box_2[..., 3] - box_2[..., 1])
99
+ return int_area / (box_1_area + box_2_area - int_area)
100
+
101
+
102
+ def draw_outputs(img, outputs, class_names):
103
+ boxes, objectness, classes, nums = outputs
104
+ boxes, objectness, classes, nums = boxes[0], objectness[0], classes[0], nums[0]
105
+ wh = np.flip(img.shape[0:2])
106
+ for i in range(nums):
107
+ x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
108
+ x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
109
+ img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
110
+ img = cv2.putText(img, '{} {:.4f}'.format(
111
+ class_names[int(classes[i])], objectness[i]),
112
+ x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
113
+ return img
114
+
115
+
116
+ def draw_labels(x, y, class_names):
117
+ img = x.numpy()
118
+ boxes, classes = tf.split(y, (4, 1), axis=-1)
119
+ classes = classes[..., 0]
120
+ wh = np.flip(img.shape[0:2])
121
+ for i in range(len(boxes)):
122
+ x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
123
+ x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
124
+ img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
125
+ img = cv2.putText(img, class_names[classes[i]],
126
+ x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL,
127
+ 1, (0, 0, 255), 2)
128
+ return img
129
+
130
+
131
+ def freeze_all(model, frozen=True):
132
+ model.trainable = not frozen
133
+ if isinstance(model, tf.keras.Model):
134
+ for l in model.layers:
135
+ freeze_all(l, frozen)