alfred8995 commited on
Commit
19ab9f7
·
verified ·
1 Parent(s): 48056a3

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +1 -0
  2. chute_config.yml +19 -0
  3. miner.py +599 -0
  4. weights.onnx +3 -0
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ **
chute_config.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Image:
2
+ from_base: parachutes/python:3.12
3
+ run_command:
4
+ - pip install --upgrade setuptools wheel
5
+ - pip install 'numpy>=1.23' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
+ - pip install torch torchvision
7
+
8
+ NodeSelector:
9
+ gpu_count: 1
10
+ min_vram_gb_per_gpu: 16
11
+ max_hourly_price_per_gpu: 1
12
+
13
+
14
+ Chute:
15
+ timeout_seconds: 900
16
+ concurrency: 4
17
+ max_instances: 5
18
+ scaling_threshold: 0.5
19
+ shutdown_after_seconds: 288000
miner.py ADDED
@@ -0,0 +1,599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import math
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import onnxruntime as ort
7
+ from numpy import ndarray
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class BoundingBox(BaseModel):
12
+ x1: int
13
+ y1: int
14
+ x2: int
15
+ y2: int
16
+ cls_id: int
17
+ conf: float
18
+
19
+
20
+ class TVFrameResult(BaseModel):
21
+ frame_id: int
22
+ boxes: list[BoundingBox]
23
+ keypoints: list[tuple[int, int]]
24
+
25
+
26
+ class Miner:
27
+ def __init__(self, path_hf_repo: Path) -> None:
28
+ model_path = path_hf_repo / "weights.onnx"
29
+ self.model_path = model_path
30
+ self.class_names = ["bus", "car", "truck", "motorcycle"]
31
+ model_class_order = ["car", "bus", "truck", "motorcycle"]
32
+ self.cls_remap = np.array(
33
+ [self.class_names.index(n) for n in model_class_order], dtype=np.int32
34
+ )
35
+
36
+ try:
37
+ ort.preload_dlls()
38
+ except Exception:
39
+ pass
40
+
41
+ sess_options = ort.SessionOptions()
42
+ sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
43
+
44
+ try:
45
+ self.session = ort.InferenceSession(
46
+ str(model_path),
47
+ sess_options=sess_options,
48
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
49
+ )
50
+ except Exception:
51
+ self.session = ort.InferenceSession(
52
+ str(model_path),
53
+ sess_options=sess_options,
54
+ providers=["CPUExecutionProvider"],
55
+ )
56
+
57
+ self.input_name = self.session.get_inputs()[0].name
58
+ self.output_names = [output.name for output in self.session.get_outputs()]
59
+ self.input_shape = self.session.get_inputs()[0].shape
60
+
61
+ self.input_height = self._safe_dim(self.input_shape[2], default=1280)
62
+ self.input_width = self._safe_dim(self.input_shape[3], default=1280)
63
+
64
+ self.conf_thres = 0.14
65
+ self.iou_thres = 0.5
66
+ self.max_det = 150
67
+ self.conf_high = 0.48
68
+ self.tta_match_iou = 0.57
69
+ self.conf_adapt_low = 0.157
70
+ self.conf_adapt_high = 0.605
71
+ self.count_low = 8
72
+ self.count_high = 23
73
+ self.use_tta = True
74
+ self.min_box_area = 14 * 14
75
+ self.min_w = 8
76
+ self.min_h = 8
77
+ self.max_aspect_ratio = 6.5
78
+ self.max_box_area_ratio = 0.8
79
+
80
+ def __repr__(self) -> str:
81
+ return (
82
+ f"ONNXRuntime(session={type(self.session).__name__}, "
83
+ f"providers={self.session.get_providers()})"
84
+ )
85
+
86
+ @staticmethod
87
+ def _safe_dim(value, default: int) -> int:
88
+ return value if isinstance(value, int) and value > 0 else default
89
+
90
+ def _letterbox(
91
+ self,
92
+ image: ndarray,
93
+ new_shape: tuple[int, int],
94
+ color=(114, 114, 114),
95
+ ) -> tuple[ndarray, float, tuple[float, float]]:
96
+ h, w = image.shape[:2]
97
+ new_w, new_h = new_shape
98
+
99
+ ratio = min(new_w / w, new_h / h)
100
+ resized_w = int(round(w * ratio))
101
+ resized_h = int(round(h * ratio))
102
+
103
+ if (resized_w, resized_h) != (w, h):
104
+ interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
105
+ image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
106
+
107
+ dw = new_w - resized_w
108
+ dh = new_h - resized_h
109
+ dw /= 2.0
110
+ dh /= 2.0
111
+
112
+ left = int(round(dw - 0.1))
113
+ right = int(round(dw + 0.1))
114
+ top = int(round(dh - 0.1))
115
+ bottom = int(round(dh + 0.1))
116
+
117
+ padded = cv2.copyMakeBorder(
118
+ image,
119
+ top,
120
+ bottom,
121
+ left,
122
+ right,
123
+ borderType=cv2.BORDER_CONSTANT,
124
+ value=color,
125
+ )
126
+ return padded, ratio, (dw, dh)
127
+
128
+ def _preprocess(
129
+ self, image: ndarray
130
+ ) -> tuple[np.ndarray, float, tuple[float, float], tuple[int, int]]:
131
+ orig_h, orig_w = image.shape[:2]
132
+
133
+ img, ratio, pad = self._letterbox(
134
+ image, (self.input_width, self.input_height)
135
+ )
136
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
137
+ img = img.astype(np.float32) / 255.0
138
+ img = np.transpose(img, (2, 0, 1))[None, ...]
139
+ img = np.ascontiguousarray(img, dtype=np.float32)
140
+
141
+ return img, ratio, pad, (orig_w, orig_h)
142
+
143
+ @staticmethod
144
+ def _clip_boxes(boxes: np.ndarray, image_size: tuple[int, int]) -> np.ndarray:
145
+ w, h = image_size
146
+ boxes[:, 0] = np.clip(boxes[:, 0], 0, w - 1)
147
+ boxes[:, 1] = np.clip(boxes[:, 1], 0, h - 1)
148
+ boxes[:, 2] = np.clip(boxes[:, 2], 0, w - 1)
149
+ boxes[:, 3] = np.clip(boxes[:, 3], 0, h - 1)
150
+ return boxes
151
+
152
+ @staticmethod
153
+ def _xywh_to_xyxy(boxes: np.ndarray) -> np.ndarray:
154
+ out = np.empty_like(boxes)
155
+ out[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
156
+ out[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
157
+ out[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
158
+ out[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
159
+ return out
160
+
161
+ @staticmethod
162
+ def _hard_nms(
163
+ boxes: np.ndarray,
164
+ scores: np.ndarray,
165
+ iou_thresh: float,
166
+ ) -> np.ndarray:
167
+ if len(boxes) == 0:
168
+ return np.array([], dtype=np.intp)
169
+
170
+ boxes = np.asarray(boxes, dtype=np.float32)
171
+ scores = np.asarray(scores, dtype=np.float32)
172
+ order = np.argsort(scores)[::-1]
173
+ keep = []
174
+
175
+ while len(order) > 0:
176
+ i = order[0]
177
+ keep.append(i)
178
+ if len(order) == 1:
179
+ break
180
+
181
+ rest = order[1:]
182
+
183
+ xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
184
+ yy1 = np.maximum(boxes[i, 1], boxes[rest, 1])
185
+ xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
186
+ yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
187
+
188
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
189
+
190
+ area_i = np.maximum(0.0, (boxes[i, 2] - boxes[i, 0])) * np.maximum(0.0, (boxes[i, 3] - boxes[i, 1]))
191
+ area_r = np.maximum(0.0, (boxes[rest, 2] - boxes[rest, 0])) * np.maximum(0.0, (boxes[rest, 3] - boxes[rest, 1]))
192
+
193
+ iou = inter / (area_i + area_r - inter + 1e-7)
194
+ order = rest[iou <= iou_thresh]
195
+
196
+ return np.array(keep, dtype=np.intp)
197
+
198
+ def _per_class_hard_nms(
199
+ self,
200
+ boxes: np.ndarray,
201
+ scores: np.ndarray,
202
+ cls_ids: np.ndarray,
203
+ iou_thresh: float,
204
+ ) -> np.ndarray:
205
+ if len(boxes) == 0:
206
+ return np.array([], dtype=np.intp)
207
+ all_keep: list[int] = []
208
+ for c in np.unique(cls_ids):
209
+ mask = cls_ids == c
210
+ indices = np.where(mask)[0]
211
+ keep = self._hard_nms(boxes[mask], scores[mask], iou_thresh)
212
+ all_keep.extend(indices[keep].tolist())
213
+ all_keep.sort()
214
+ return np.array(all_keep, dtype=np.intp)
215
+
216
+ @staticmethod
217
+ def _box_iou_one_to_many(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
218
+ xx1 = np.maximum(box[0], boxes[:, 0])
219
+ yy1 = np.maximum(box[1], boxes[:, 1])
220
+ xx2 = np.minimum(box[2], boxes[:, 2])
221
+ yy2 = np.minimum(box[3], boxes[:, 3])
222
+
223
+ inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
224
+
225
+ area_a = max(0.0, (box[2] - box[0]) * (box[3] - box[1]))
226
+ area_b = np.maximum(0.0, boxes[:, 2] - boxes[:, 0]) * np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
227
+
228
+ return inter / (area_a + area_b - inter + 1e-7)
229
+
230
+ def _filter_sane_boxes(
231
+ self,
232
+ boxes: np.ndarray,
233
+ scores: np.ndarray,
234
+ cls_ids: np.ndarray,
235
+ orig_size: tuple[int, int],
236
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
237
+ if len(boxes) == 0:
238
+ return boxes, scores, cls_ids
239
+
240
+ orig_w, orig_h = orig_size
241
+ image_area = float(orig_w * orig_h)
242
+
243
+ keep = []
244
+ for i, box in enumerate(boxes):
245
+ x1, y1, x2, y2 = box.tolist()
246
+ bw = x2 - x1
247
+ bh = y2 - y1
248
+
249
+ if bw <= 0 or bh <= 0:
250
+ continue
251
+ if bw < self.min_w or bh < self.min_h:
252
+ continue
253
+
254
+ area = bw * bh
255
+ if area < self.min_box_area:
256
+ continue
257
+ if area > self.max_box_area_ratio * image_area:
258
+ continue
259
+
260
+ ar = max(bw / max(bh, 1e-6), bh / max(bw, 1e-6))
261
+ if ar > self.max_aspect_ratio:
262
+ continue
263
+
264
+ keep.append(i)
265
+
266
+ if not keep:
267
+ return (
268
+ np.empty((0, 4), dtype=np.float32),
269
+ np.empty((0,), dtype=np.float32),
270
+ np.empty((0,), dtype=np.int32),
271
+ )
272
+
273
+ keep = np.array(keep, dtype=np.intp)
274
+ return boxes[keep], scores[keep], cls_ids[keep]
275
+
276
+ def _decode_final_dets(
277
+ self,
278
+ preds: np.ndarray,
279
+ ratio: float,
280
+ pad: tuple[float, float],
281
+ orig_size: tuple[int, int],
282
+ ) -> list[BoundingBox]:
283
+ if preds.ndim == 3 and preds.shape[0] == 1:
284
+ preds = preds[0]
285
+
286
+ if preds.ndim != 2 or preds.shape[1] < 6:
287
+ raise ValueError(f"Unexpected ONNX final-det output shape: {preds.shape}")
288
+
289
+ boxes = preds[:, :4].astype(np.float32)
290
+ scores = preds[:, 4].astype(np.float32)
291
+ cls_ids = preds[:, 5].astype(np.int32)
292
+ cls_ids = self.cls_remap[cls_ids]
293
+
294
+ keep = scores >= self.conf_thres
295
+ boxes = boxes[keep]
296
+ scores = scores[keep]
297
+ cls_ids = cls_ids[keep]
298
+
299
+ if len(boxes) == 0:
300
+ return []
301
+
302
+ pad_w, pad_h = pad
303
+ orig_w, orig_h = orig_size
304
+
305
+ boxes[:, [0, 2]] -= pad_w
306
+ boxes[:, [1, 3]] -= pad_h
307
+ boxes /= ratio
308
+ boxes = self._clip_boxes(boxes, (orig_w, orig_h))
309
+
310
+ boxes, scores, cls_ids = self._filter_sane_boxes(boxes, scores, cls_ids, orig_size)
311
+ if len(boxes) == 0:
312
+ return []
313
+
314
+ keep_idx = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thres)
315
+ keep_idx = keep_idx[: self.max_det]
316
+
317
+ boxes = boxes[keep_idx]
318
+ scores = scores[keep_idx]
319
+ cls_ids = cls_ids[keep_idx]
320
+
321
+ return [
322
+ BoundingBox(
323
+ x1=int(math.floor(box[0])),
324
+ y1=int(math.floor(box[1])),
325
+ x2=int(math.ceil(box[2])),
326
+ y2=int(math.ceil(box[3])),
327
+ cls_id=int(cls_id),
328
+ conf=float(conf),
329
+ )
330
+ for box, conf, cls_id in zip(boxes, scores, cls_ids)
331
+ if box[2] > box[0] and box[3] > box[1]
332
+ ]
333
+
334
+ def _decode_raw_yolo(
335
+ self,
336
+ preds: np.ndarray,
337
+ ratio: float,
338
+ pad: tuple[float, float],
339
+ orig_size: tuple[int, int],
340
+ ) -> list[BoundingBox]:
341
+ if preds.ndim != 3:
342
+ raise ValueError(f"Unexpected raw ONNX output shape: {preds.shape}")
343
+ if preds.shape[0] != 1:
344
+ raise ValueError(f"Unexpected batch dimension in raw output: {preds.shape}")
345
+
346
+ preds = preds[0]
347
+
348
+ if preds.shape[0] <= 16 and preds.shape[1] > preds.shape[0]:
349
+ preds = preds.T
350
+
351
+ if preds.ndim != 2 or preds.shape[1] < 5:
352
+ raise ValueError(f"Unexpected normalized raw output shape: {preds.shape}")
353
+
354
+ boxes_xywh = preds[:, :4].astype(np.float32)
355
+ tail = preds[:, 4:].astype(np.float32)
356
+
357
+ if tail.shape[1] == 1:
358
+ scores = tail[:, 0]
359
+ cls_ids = np.zeros(len(scores), dtype=np.int32)
360
+ elif tail.shape[1] == 2:
361
+ obj = tail[:, 0]
362
+ cls_prob = tail[:, 1]
363
+ scores = obj * cls_prob
364
+ cls_ids = np.zeros(len(scores), dtype=np.int32)
365
+ else:
366
+ obj = tail[:, 0]
367
+ class_probs = tail[:, 1:]
368
+ cls_ids = np.argmax(class_probs, axis=1).astype(np.int32)
369
+ cls_scores = class_probs[np.arange(len(class_probs)), cls_ids]
370
+ scores = obj * cls_scores
371
+
372
+ cls_ids = self.cls_remap[cls_ids]
373
+
374
+ keep = scores >= self.conf_thres
375
+ boxes_xywh = boxes_xywh[keep]
376
+ scores = scores[keep]
377
+ cls_ids = cls_ids[keep]
378
+
379
+ if len(boxes_xywh) == 0:
380
+ return []
381
+
382
+ boxes = self._xywh_to_xyxy(boxes_xywh)
383
+
384
+ keep_idx = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thres)
385
+ keep_idx = keep_idx[: self.max_det]
386
+ boxes = boxes[keep_idx]
387
+ scores = scores[keep_idx]
388
+ cls_ids = cls_ids[keep_idx]
389
+
390
+ pad_w, pad_h = pad
391
+ orig_w, orig_h = orig_size
392
+
393
+ boxes[:, [0, 2]] -= pad_w
394
+ boxes[:, [1, 3]] -= pad_h
395
+ boxes /= ratio
396
+ boxes = self._clip_boxes(boxes, (orig_w, orig_h))
397
+
398
+ boxes, scores, cls_ids = self._filter_sane_boxes(boxes, scores, cls_ids, orig_size)
399
+ if len(boxes) == 0:
400
+ return []
401
+
402
+ return [
403
+ BoundingBox(
404
+ x1=int(math.floor(box[0])),
405
+ y1=int(math.floor(box[1])),
406
+ x2=int(math.ceil(box[2])),
407
+ y2=int(math.ceil(box[3])),
408
+ cls_id=int(cls_id),
409
+ conf=float(conf),
410
+ )
411
+ for box, conf, cls_id in zip(boxes, scores, cls_ids)
412
+ if box[2] > box[0] and box[3] > box[1]
413
+ ]
414
+
415
+ def _postprocess(
416
+ self,
417
+ output: np.ndarray,
418
+ ratio: float,
419
+ pad: tuple[float, float],
420
+ orig_size: tuple[int, int],
421
+ ) -> list[BoundingBox]:
422
+ if output.ndim == 2 and output.shape[1] >= 6:
423
+ return self._decode_final_dets(output, ratio, pad, orig_size)
424
+
425
+ if output.ndim == 3 and output.shape[0] == 1 and output.shape[2] >= 6:
426
+ return self._decode_final_dets(output, ratio, pad, orig_size)
427
+
428
+ return self._decode_raw_yolo(output, ratio, pad, orig_size)
429
+
430
+ def _predict_single(self, image: np.ndarray) -> list[BoundingBox]:
431
+ if image is None:
432
+ raise ValueError("Input image is None")
433
+ if not isinstance(image, np.ndarray):
434
+ raise TypeError(f"Input is not numpy array: {type(image)}")
435
+ if image.ndim != 3:
436
+ raise ValueError(f"Expected HWC image, got shape={image.shape}")
437
+ if image.shape[0] <= 0 or image.shape[1] <= 0:
438
+ raise ValueError(f"Invalid image shape={image.shape}")
439
+ if image.shape[2] != 3:
440
+ raise ValueError(f"Expected 3 channels, got shape={image.shape}")
441
+
442
+ if image.dtype != np.uint8:
443
+ image = image.astype(np.uint8)
444
+ image = np.ascontiguousarray(image)
445
+
446
+ input_tensor, ratio, pad, orig_size = self._preprocess(image)
447
+
448
+ expected_shape = (1, 3, self.input_height, self.input_width)
449
+ if input_tensor.shape != expected_shape:
450
+ raise ValueError(
451
+ f"Bad input tensor shape={input_tensor.shape}, expected={expected_shape}"
452
+ )
453
+
454
+ outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
455
+ det_output = outputs[0]
456
+ return self._postprocess(det_output, ratio, pad, orig_size)
457
+
458
+ def _merge_tta_consensus(
459
+ self,
460
+ boxes_orig: list[BoundingBox],
461
+ boxes_flip: list[BoundingBox],
462
+ ) -> list[BoundingBox]:
463
+ if not boxes_orig and not boxes_flip:
464
+ return []
465
+
466
+ coords_o = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0, 4), dtype=np.float32)
467
+ scores_o = np.array([b.conf for b in boxes_orig], dtype=np.float32) if boxes_orig else np.empty((0,), dtype=np.float32)
468
+
469
+ coords_f = np.array([[b.x1, b.y1, b.x2, b.y2] for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0, 4), dtype=np.float32)
470
+ scores_f = np.array([b.conf for b in boxes_flip], dtype=np.float32) if boxes_flip else np.empty((0,), dtype=np.float32)
471
+
472
+ accepted_boxes = []
473
+ accepted_scores = []
474
+ accepted_cls: list[int] = []
475
+
476
+ for i in range(len(coords_o)):
477
+ score = scores_o[i]
478
+ cid = boxes_orig[i].cls_id
479
+ if score >= self.conf_high:
480
+ accepted_boxes.append(coords_o[i])
481
+ accepted_scores.append(score)
482
+ accepted_cls.append(cid)
483
+ elif len(coords_f) > 0:
484
+ ious = self._box_iou_one_to_many(coords_o[i], coords_f)
485
+ j = int(np.argmax(ious))
486
+ if ious[j] >= self.tta_match_iou and boxes_flip[j].cls_id == cid:
487
+ fused_score = max(score, scores_f[j])
488
+ accepted_boxes.append(coords_o[i])
489
+ accepted_scores.append(fused_score)
490
+ accepted_cls.append(cid)
491
+
492
+ for i in range(len(coords_f)):
493
+ score = scores_f[i]
494
+ if score < self.conf_high:
495
+ continue
496
+
497
+ cid = boxes_flip[i].cls_id
498
+
499
+ if len(coords_o) == 0:
500
+ accepted_boxes.append(coords_f[i])
501
+ accepted_scores.append(score)
502
+ accepted_cls.append(cid)
503
+ continue
504
+
505
+ ious = self._box_iou_one_to_many(coords_f[i], coords_o)
506
+ if np.max(ious) < self.tta_match_iou:
507
+ accepted_boxes.append(coords_f[i])
508
+ accepted_scores.append(score)
509
+ accepted_cls.append(cid)
510
+
511
+ if not accepted_boxes:
512
+ return []
513
+
514
+ boxes = np.array(accepted_boxes, dtype=np.float32)
515
+ scores = np.array(accepted_scores, dtype=np.float32)
516
+ cls_ids = np.array(accepted_cls, dtype=np.int32)
517
+
518
+ keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thres)
519
+ keep = keep[: self.max_det]
520
+
521
+ out = []
522
+ for idx in keep:
523
+ x1, y1, x2, y2 = boxes[idx].tolist()
524
+ out.append(
525
+ BoundingBox(
526
+ x1=int(math.floor(x1)),
527
+ y1=int(math.floor(y1)),
528
+ x2=int(math.ceil(x2)),
529
+ y2=int(math.ceil(y2)),
530
+ cls_id=int(cls_ids[idx]),
531
+ conf=float(scores[idx]),
532
+ )
533
+ )
534
+ return out
535
+
536
+ def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
537
+ boxes_orig = self._predict_single(image)
538
+
539
+ flipped = cv2.flip(image, 1)
540
+ boxes_flip_raw = self._predict_single(flipped)
541
+
542
+ w = image.shape[1]
543
+ boxes_flip = [
544
+ BoundingBox(
545
+ x1=w - b.x2,
546
+ y1=b.y1,
547
+ x2=w - b.x1,
548
+ y2=b.y2,
549
+ cls_id=b.cls_id,
550
+ conf=b.conf,
551
+ )
552
+ for b in boxes_flip_raw
553
+ ]
554
+
555
+ return self._merge_tta_consensus(boxes_orig, boxes_flip)
556
+
557
+ def _adaptive_conf_threshold(self, n_raw: int) -> float:
558
+ if n_raw <= self.count_low:
559
+ return self.conf_adapt_low
560
+ if n_raw >= self.count_high:
561
+ return self.conf_adapt_high
562
+ t = (n_raw - self.count_low) / (self.count_high - self.count_low)
563
+ return self.conf_adapt_low + t * (self.conf_adapt_high - self.conf_adapt_low)
564
+
565
+ def _apply_adaptive_filter(self, boxes: list[BoundingBox]) -> list[BoundingBox]:
566
+ if not boxes:
567
+ return boxes
568
+ n_raw = len(boxes)
569
+ thresh = self._adaptive_conf_threshold(n_raw)
570
+ return [b for b in boxes if b.conf >= thresh]
571
+
572
+ def predict_batch(
573
+ self,
574
+ batch_images: list[ndarray],
575
+ offset: int,
576
+ n_keypoints: int,
577
+ ) -> list[TVFrameResult]:
578
+ results: list[TVFrameResult] = []
579
+
580
+ for frame_number_in_batch, image in enumerate(batch_images):
581
+ try:
582
+ if self.use_tta:
583
+ boxes = self._predict_tta(image)
584
+ else:
585
+ boxes = self._predict_single(image)
586
+ boxes = self._apply_adaptive_filter(boxes)
587
+ except Exception as e:
588
+ print(f"⚠️ Inference failed for frame {offset + frame_number_in_batch}: {e}")
589
+ boxes = []
590
+
591
+ results.append(
592
+ TVFrameResult(
593
+ frame_id=offset + frame_number_in_batch,
594
+ boxes=boxes,
595
+ keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
596
+ )
597
+ )
598
+
599
+ return results
weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3844fa82f72afcbd88bd3ea4ebc2e8b84bc6ef6de0f2e8c84314ebf81e2b45
3
+ size 19407992