meaculpitt commited on
Commit
bfacb8c
·
verified ·
1 Parent(s): bc957c3

numberplate: quad-4 inference + v3 epoch-19 FP16 weights. Standalone repo for 30MB compliance.

Browse files
Files changed (4) hide show
  1. chute_config.yml +16 -0
  2. class_names.txt +1 -0
  3. miner.py +426 -0
  4. numberplate_weights.onnx +3 -0
chute_config.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Image:
2
+ from_base: parachutes/python:3.12
3
+ run_command:
4
+ - pip install --upgrade setuptools wheel
5
+ - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12>=9.0' 'nvidia-cublas-cu12>=12.8' 'nvidia-cuda-runtime-cu12>=12.8' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'torch>=2.8'
6
+
7
+ NodeSelector:
8
+ gpu_count: 1
9
+ min_vram_gb_per_gpu: 16
10
+
11
+ Chute:
12
+ timeout_seconds: 300
13
+ concurrency: 4
14
+ max_instances: 5
15
+ scaling_threshold: 0.5
16
+ shutdown_after: 288000
class_names.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ numberplate
miner.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SN44 number plate detection miner — single-element chute for
3
+ manak0/Detect-number-plates-1-0.
4
+
5
+ Adapted from the auto-generated detect-person-reference miner with four
6
+ substantive changes:
7
+
8
+ 1. Class set is the single class ``numberplate`` (the validator's exact
9
+ label string).
10
+ 2. Lower confidence threshold (0.15 vs 0.25) because the validator's
11
+ plates are tiny — 5–92 px wide on a 1408 px frame, median ~30 px.
12
+ At standard 0.25 most true positives get filtered before NMS.
13
+ 3. Standard NMS replaced with Gaussian Soft-NMS (sigma=0.5). Soft-NMS
14
+ decays scores of overlapping boxes instead of suppressing them
15
+ outright, which helps on plate-dense frames (parking lot, car
16
+ carrier, gas station forecourt) where standard NMS over-suppresses
17
+ adjacent plates.
18
+ 4. CUDA library preload at import time so onnxruntime-gpu finds
19
+ libcudnn / libcublas from the nvidia-* pip wheels even when
20
+ LD_LIBRARY_PATH is not set (the chute container ships these wheels
21
+ but does not export them).
22
+
23
+ Soft-NMS is inlined here rather than imported from /home/miner/utils
24
+ because the chute platform sandbox restricts non-stdlib imports beyond
25
+ the deps declared in chute_config.yml. The implementation is a
26
+ specialised single-class version of soft_nms_yolo from
27
+ /home/miner/utils/soft_nms.py — see that file for the full
28
+ multi-class / multi-backend version.
29
+ """
30
+ import ctypes
31
+ import glob as _glob
32
+ import logging as _logging
33
+ import os
34
+
35
+ _cuda_log = _logging.getLogger(__name__)
36
+
37
+
38
+ def _preload_cuda_libs() -> None:
39
+ """Pre-load CUDA + cuDNN + cuBLAS shared libs from nvidia-* pip wheels.
40
+
41
+ Without this, onnxruntime-gpu's CUDAExecutionProvider silently falls
42
+ back to CPU because it can't dlopen libcudnn.so.9 — the nvidia
43
+ wheels ship the library inside `nvidia/cudnn/lib/` but do NOT add
44
+ that directory to the loader path. We import the wheel modules to
45
+ locate their lib dirs, prepend them to LD_LIBRARY_PATH for any
46
+ child processes, and ctypes.CDLL the .so files with RTLD_GLOBAL so
47
+ onnxruntime's dlopen sees them.
48
+ """
49
+ try:
50
+ lib_dirs: list[str] = []
51
+ for mod_name in (
52
+ "nvidia.cudnn",
53
+ "nvidia.cublas",
54
+ "nvidia.cuda_runtime",
55
+ "nvidia.cufft",
56
+ "nvidia.curand",
57
+ "nvidia.cusolver",
58
+ "nvidia.cusparse",
59
+ "nvidia.nvjitlink",
60
+ ):
61
+ try:
62
+ mod = __import__(mod_name, fromlist=["__file__"])
63
+ lib_dir = os.path.join(os.path.dirname(mod.__file__), "lib")
64
+ if os.path.isdir(lib_dir) and lib_dir not in lib_dirs:
65
+ lib_dirs.append(lib_dir)
66
+ except ImportError:
67
+ pass
68
+
69
+ if not lib_dirs:
70
+ _cuda_log.warning("no nvidia-* lib dirs found; ORT GPU may fall back to CPU")
71
+ return
72
+
73
+ # Update LD_LIBRARY_PATH for any child processes / dlopen fallbacks
74
+ existing = os.environ.get("LD_LIBRARY_PATH", "")
75
+ os.environ["LD_LIBRARY_PATH"] = ":".join(
76
+ lib_dirs + ([existing] if existing else [])
77
+ )
78
+
79
+ # ctypes.CDLL each .so so the symbols are globally visible to ORT
80
+ for lib_dir in lib_dirs:
81
+ for so in sorted(_glob.glob(os.path.join(lib_dir, "lib*.so*"))):
82
+ try:
83
+ ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
84
+ except OSError:
85
+ pass
86
+ except Exception as e: # pragma: no cover - best effort
87
+ _cuda_log.warning("CUDA preload failed: %s", e)
88
+
89
+
90
+ _preload_cuda_libs()
91
+
92
+
93
+ from pathlib import Path
94
+ import math
95
+
96
+ import cv2
97
+ import numpy as np
98
+ import onnxruntime as ort
99
+ from numpy import ndarray
100
+ from pydantic import BaseModel
101
+
102
+
103
+ class BoundingBox(BaseModel):
104
+ x1: int
105
+ y1: int
106
+ x2: int
107
+ y2: int
108
+ cls_id: int
109
+ conf: float
110
+
111
+
112
+ class TVFrameResult(BaseModel):
113
+ frame_id: int
114
+ boxes: list[BoundingBox]
115
+ keypoints: list[tuple[int, int]]
116
+
117
+
118
+ class Miner:
119
+ """
120
+ Single-element ONNX miner for the manak0/Detect-number-plates-1-0
121
+ element. Auto-loaded by the chute platform; the platform passes the
122
+ snapshot path of the HF repo containing weights.onnx as
123
+ ``path_hf_repo`` and calls ``predict_batch(batch_images, offset,
124
+ n_keypoints)`` for each request.
125
+ """
126
+
127
+ def __init__(self, path_hf_repo) -> None:
128
+ self.path_hf_repo = Path(path_hf_repo)
129
+ self.class_names = ['numberplate']
130
+ self.session = ort.InferenceSession(
131
+ str(self.path_hf_repo / "numberplate_weights.onnx"),
132
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
133
+ )
134
+ self.input_name = self.session.get_inputs()[0].name
135
+ input_shape = self.session.get_inputs()[0].shape
136
+ # expected [N, C, H, W]; dynamic-export ONNX has string placeholders
137
+ # for spatial dims. We always run inference at 1408 (the validator's
138
+ # native frame width); the ONNX accepts variable shapes via dynamic
139
+ # axes, and inference at 1408 gives substantially better small-plate
140
+ # recall than the model's training resolution (verified on the 7
141
+ # starter assets: 43% recall at 960 vs 60% at 1408).
142
+ def _maybe_int(d, default):
143
+ try:
144
+ return int(d)
145
+ except (TypeError, ValueError):
146
+ return default
147
+ # Hard-pin to the validator's native 1408x768 (rectangular). This
148
+ # is half the pixel count of a 1408x1408 square pad and matches
149
+ # the validator's exact frame shape, eliminating wasted padding
150
+ # rows. yolo11s strides are 32, both 1408 (44*32) and 768 (24*32)
151
+ # are valid.
152
+ self.input_h = 768
153
+ self.input_w = 1408
154
+ # Record what the ONNX *declared*, for diagnostic logging only
155
+ self._onnx_declared_h = _maybe_int(input_shape[2], None)
156
+ self._onnx_declared_w = _maybe_int(input_shape[3], None)
157
+
158
+ # Pre-NMS confidence threshold. The reference uses 0.25; we lower
159
+ # slightly because validator plates are tiny but not as far as 0.15
160
+ # which produces too many decayed-score ghost detections at 1408
161
+ # input resolution (verified on starter assets: F1 dropped from
162
+ # 0.625 to 0.462 at conf=0.15).
163
+ self.conf_threshold = 0.25
164
+ # Soft-NMS hyperparameters (Gaussian variant).
165
+ self.soft_nms_sigma = 0.5
166
+ # Final score floor after Soft-NMS decay. At higher input resolution
167
+ # the model produces more medium-confidence detections that survive
168
+ # decay; we keep this stricter so they don't pollute the output.
169
+ self.score_threshold = 0.20
170
+
171
+ # GPU warmup — force ORT / CUDA / cuDNN kernel compilation and pull
172
+ # the 4090 out of low-power idle state so the first real validator
173
+ # frame doesn't pay a ~20 ms DVFS spin-up tax. SCOREVISION_WARMUP_CALLS
174
+ # at the chute level defaults to 3, which is not enough to reach
175
+ # steady-state on this tiled inference path (measured: 3 calls -> 52
176
+ # ms p95 on the first few frames vs 31 ms steady). 10 full pipeline
177
+ # runs on a synthetic frame gets us to the fast regime before the
178
+ # platform warmup even starts.
179
+ _warmup_frame = np.zeros((self.input_h, self.input_w, 3), dtype=np.uint8)
180
+ for _ in range(10):
181
+ try:
182
+ self._infer_single(_warmup_frame)
183
+ except Exception: # pragma: no cover - best effort
184
+ break
185
+
186
+ def __repr__(self) -> str:
187
+ return (
188
+ f"NumberplateMiner session={type(self.session).__name__} "
189
+ f"input={self.input_h}x{self.input_w} classes={len(self.class_names)}"
190
+ )
191
+
192
+ # ---------------------------------------------------------------- preproc
193
+ def _preprocess(self, image_bgr: ndarray):
194
+ """Letterbox the BGR image to (input_h, input_w), preserving aspect.
195
+
196
+ Returns the float32 NCHW tensor plus the metadata needed to undo
197
+ the letterbox during decode: (orig_h, orig_w, scale, dx, dy).
198
+ """
199
+ h, w = image_bgr.shape[:2]
200
+ scale = min(self.input_h / h, self.input_w / w)
201
+ nh, nw = int(round(h * scale)), int(round(w * scale))
202
+ resized = cv2.resize(image_bgr, (nw, nh))
203
+ # Pad to (input_h, input_w) with grey (114) - ultralytics default
204
+ canvas = np.full((self.input_h, self.input_w, 3), 114, dtype=np.uint8)
205
+ dy = (self.input_h - nh) // 2
206
+ dx = (self.input_w - nw) // 2
207
+ canvas[dy:dy + nh, dx:dx + nw] = resized
208
+ rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
209
+ x = rgb.astype(np.float32) / 255.0
210
+ x = np.transpose(x, (2, 0, 1))[None, ...]
211
+ return x, (h, w, scale, dx, dy)
212
+
213
+ # ---------------------------------------------------------------- decode
214
+ def _normalize_predictions(self, raw: np.ndarray) -> np.ndarray:
215
+ """Handle both common ultralytics export shapes ([1,C,N] and [1,N,C])."""
216
+ pred = raw[0]
217
+ if pred.ndim != 2:
218
+ raise ValueError(f"Unexpected prediction shape: {raw.shape}")
219
+ if pred.shape[0] < pred.shape[1]:
220
+ pred = pred.transpose(1, 0)
221
+ return pred
222
+
223
+ # ---------------------------------------------------------------- soft NMS
224
+ def _soft_nms(
225
+ self,
226
+ dets: list[tuple[float, float, float, float, float, int]],
227
+ ) -> list[tuple[float, float, float, float, float, int]]:
228
+ """Gaussian Soft-NMS for a single class.
229
+
230
+ Decays each remaining box's score by ``exp(-iou^2 / sigma)`` against
231
+ the highest-scoring picked box, then drops anything below
232
+ ``self.score_threshold``. Returns detections in descending decayed
233
+ score order.
234
+ """
235
+ if not dets:
236
+ return []
237
+
238
+ boxes = np.asarray([[d[0], d[1], d[2], d[3]] for d in dets], dtype=np.float32)
239
+ scores = np.asarray([d[4] for d in dets], dtype=np.float32)
240
+ cls_ids = [int(d[5]) for d in dets]
241
+ n = len(dets)
242
+
243
+ keep_idx: list[int] = []
244
+ keep_scores: list[float] = []
245
+ active = np.ones(n, dtype=bool)
246
+
247
+ while True:
248
+ valid_mask = active & (scores >= self.score_threshold)
249
+ if not valid_mask.any():
250
+ break
251
+ valid_idx = np.where(valid_mask)[0]
252
+ m_local = valid_idx[int(np.argmax(scores[valid_idx]))]
253
+
254
+ keep_idx.append(int(m_local))
255
+ keep_scores.append(float(scores[m_local]))
256
+ active[m_local] = False
257
+
258
+ # IoU of m_local against all still-active boxes
259
+ others = np.where(active)[0]
260
+ if others.size == 0:
261
+ break
262
+ ax1 = np.maximum(boxes[m_local, 0], boxes[others, 0])
263
+ ay1 = np.maximum(boxes[m_local, 1], boxes[others, 1])
264
+ ax2 = np.minimum(boxes[m_local, 2], boxes[others, 2])
265
+ ay2 = np.minimum(boxes[m_local, 3], boxes[others, 3])
266
+ inter_w = np.clip(ax2 - ax1, a_min=0.0, a_max=None)
267
+ inter_h = np.clip(ay2 - ay1, a_min=0.0, a_max=None)
268
+ inter = inter_w * inter_h
269
+ area_m = max(0.0, (boxes[m_local, 2] - boxes[m_local, 0])) * \
270
+ max(0.0, (boxes[m_local, 3] - boxes[m_local, 1]))
271
+ area_o = (
272
+ np.clip(boxes[others, 2] - boxes[others, 0], a_min=0.0, a_max=None) *
273
+ np.clip(boxes[others, 3] - boxes[others, 1], a_min=0.0, a_max=None)
274
+ )
275
+ union = area_m + area_o - inter
276
+ iou = np.where(union > 0.0, inter / union, 0.0)
277
+
278
+ decay = np.exp(-(iou * iou) / self.soft_nms_sigma)
279
+ scores[others] = scores[others] * decay
280
+
281
+ return [
282
+ (
283
+ float(boxes[i, 0]),
284
+ float(boxes[i, 1]),
285
+ float(boxes[i, 2]),
286
+ float(boxes[i, 3]),
287
+ float(s),
288
+ cls_ids[i],
289
+ )
290
+ for i, s in zip(keep_idx, keep_scores)
291
+ ]
292
+
293
+ # ---------------------------------------------------------------- inference
294
+ def _infer_tile(
295
+ self,
296
+ image_bgr: ndarray,
297
+ x0: int,
298
+ y0: int,
299
+ x1: int,
300
+ y1: int,
301
+ ) -> list[tuple[float, float, float, float, float, int]]:
302
+ """Run one inference pass on ``image_bgr[y0:y1, x0:x1]`` resized
303
+ anisotropically to ``(input_h, input_w)`` and return raw detections
304
+ (pre-Soft-NMS) mapped back to ORIGINAL-image coordinates.
305
+
306
+ Anisotropic resize is intentional: the tile aspect ratio differs
307
+ from the model input, and we want the tile pixels to magnify up to
308
+ the detector's stride-8 feature footprint. For the 1408x422
309
+ top/bottom tiles used by ``_infer_single`` this yields ~1.82x
310
+ vertical magnification (and 1.0x horizontal), which is what pushes
311
+ tiny-height plates (5-12 px on the validator's starter frames)
312
+ above the stride-8 threshold.
313
+ """
314
+ crop = image_bgr[y0:y1, x0:x1]
315
+ ch, cw = crop.shape[:2]
316
+ if ch == 0 or cw == 0:
317
+ return []
318
+ resized = cv2.resize(crop, (self.input_w, self.input_h))
319
+ rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
320
+ x = np.transpose(rgb.astype(np.float32) / 255.0, (2, 0, 1))[None, ...]
321
+ out = self.session.run(None, {self.input_name: x})[0]
322
+ pred = self._normalize_predictions(out)
323
+
324
+ if pred.shape[1] < 5:
325
+ return []
326
+
327
+ boxes_m = pred[:, :4]
328
+ cls_scores = pred[:, 4:]
329
+ if cls_scores.shape[1] == 0:
330
+ return []
331
+
332
+ cls_ids = np.argmax(cls_scores, axis=1)
333
+ confs = np.max(cls_scores, axis=1)
334
+ keep = confs >= self.conf_threshold
335
+ boxes_m = boxes_m[keep]
336
+ confs = confs[keep]
337
+ cls_ids = cls_ids[keep]
338
+ if boxes_m.shape[0] == 0:
339
+ return []
340
+
341
+ # Model-space (input_w x input_h) -> crop-space -> original image
342
+ sx = cw / self.input_w
343
+ sy = ch / self.input_h
344
+ dets: list[tuple[float, float, float, float, float, int]] = []
345
+ for i in range(boxes_m.shape[0]):
346
+ cx, cy, bw, bh = boxes_m[i].tolist()
347
+ xa = (cx - bw / 2.0) * sx + x0
348
+ ya = (cy - bh / 2.0) * sy + y0
349
+ xb = (cx + bw / 2.0) * sx + x0
350
+ yb = (cy + bh / 2.0) * sy + y0
351
+ dets.append((xa, ya, xb, yb, float(confs[i]), int(cls_ids[i])))
352
+ return dets
353
+
354
+ def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
355
+ """Quad-4 (2x2 quadrant) SAHI inference.
356
+
357
+ Splits the frame into four overlapping quadrants, each
358
+ anisotropically resized to ``(input_h, input_w)`` for ~2x
359
+ magnification in both axes. This recovers plates that TB-2
360
+ (top/bottom only) missed — especially the 5-7 px plates in
361
+ image 6 that need vertical AND horizontal magnification.
362
+
363
+ Overlap is ~10% on each axis to avoid seam misses. All tile
364
+ detections are merged via Soft-NMS.
365
+
366
+ Measured on the 7 starter frames vs TB-2:
367
+ mAP@50 0.406 -> 0.489
368
+ recall 0.433 -> 0.500
369
+ wall p95 55 ms -> 98 ms (budget 10 s)
370
+ """
371
+ orig_h, orig_w = image_bgr.shape[:2]
372
+ OVERLAP_X = 70 # ~10% of 1408/2
373
+ OVERLAP_Y = 38 # ~10% of 768/2
374
+ mx = orig_w // 2
375
+ my = orig_h // 2
376
+
377
+ tiles = [
378
+ (0, 0, min(orig_w, mx + OVERLAP_X), min(orig_h, my + OVERLAP_Y)), # TL
379
+ (max(0, mx - OVERLAP_X), 0, orig_w, min(orig_h, my + OVERLAP_Y)), # TR
380
+ (0, max(0, my - OVERLAP_Y), min(orig_w, mx + OVERLAP_X), orig_h), # BL
381
+ (max(0, mx - OVERLAP_X), max(0, my - OVERLAP_Y), orig_w, orig_h), # BR
382
+ ]
383
+
384
+ all_dets = []
385
+ for x0, y0, x1, y1 in tiles:
386
+ all_dets.extend(self._infer_tile(image_bgr, x0, y0, x1, y1))
387
+
388
+ dets = self._soft_nms(all_dets)
389
+
390
+ out_boxes: list[BoundingBox] = []
391
+ for x1, y1, x2, y2, conf, cls_id in dets:
392
+ ix1 = max(0, min(orig_w, math.floor(x1)))
393
+ iy1 = max(0, min(orig_h, math.floor(y1)))
394
+ ix2 = max(0, min(orig_w, math.ceil(x2)))
395
+ iy2 = max(0, min(orig_h, math.ceil(y2)))
396
+ out_boxes.append(
397
+ BoundingBox(
398
+ x1=ix1,
399
+ y1=iy1,
400
+ x2=ix2,
401
+ y2=iy2,
402
+ cls_id=cls_id,
403
+ conf=max(0.0, min(1.0, conf)),
404
+ )
405
+ )
406
+ return out_boxes
407
+
408
+ # ---------------------------------------------------------------- entry
409
+ def predict_batch(
410
+ self,
411
+ batch_images: list[ndarray],
412
+ offset: int,
413
+ n_keypoints: int,
414
+ ) -> list[TVFrameResult]:
415
+ results: list[TVFrameResult] = []
416
+ for idx, image in enumerate(batch_images):
417
+ boxes = self._infer_single(image)
418
+ keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
419
+ results.append(
420
+ TVFrameResult(
421
+ frame_id=offset + idx,
422
+ boxes=boxes,
423
+ keypoints=keypoints,
424
+ )
425
+ )
426
+ return results
numberplate_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:955a5b36654a997cc242b60fd87070dbaf0e28247531c356fb9d5f8afa2af4b7
3
+ size 19531262