meaculpitt commited on
Commit
1a85c81
Β·
verified Β·
1 Parent(s): 65b9551

scorevision: push artifact

Browse files
chute_config.yml CHANGED
@@ -3,13 +3,23 @@ Image:
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
  - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12' 'nvidia-cublas-cu12'
6
- 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0'
7
- 'pyyaml>=6.0' 'aiohttp>=3.9' 'ensemble-boxes>=1.0' 'torch>=2.6,<3.0'
 
 
8
  NodeSelector:
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
11
  max_hourly_price_per_gpu: 2.0
12
  exclude:
 
 
 
 
 
 
 
 
13
  - '5090'
14
  - b200
15
  - h200
 
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
  - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12' 'nvidia-cublas-cu12'
6
+ 'nvidia-cuda-runtime-cu12' 'nvidia-cufft-cu12' 'nvidia-curand-cu12'
7
+ 'nvidia-cusolver-cu12' 'nvidia-cusparse-cu12' 'nvidia-nvjitlink-cu12'
8
+ 'tensorrt>=10.0' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4'
9
+ 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'ensemble-boxes>=1.0' 'torch>=2.6,<3.0'
10
  NodeSelector:
11
  gpu_count: 1
12
  min_vram_gb_per_gpu: 16
13
  max_hourly_price_per_gpu: 2.0
14
  exclude:
15
+ - a40
16
+ - l4
17
+ - a5000
18
+ - a4000
19
+ - '3090'
20
+ - a4000_ada
21
+ - a10
22
+ - a6000
23
  - '5090'
24
  - b200
25
  - h200
face_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a
3
+ size 2524817
miner.py CHANGED
@@ -1,20 +1,33 @@
1
  """
2
- Score Vision SN44 β€” Unified miner v3.7 (2026-04-02). SAHI-style tiled person inference.
3
- Dual-model: vehicle (YOLO11m INT8 1280) + person (YOLO26s FP16 960 end2end).
 
4
  Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
5
  Person weights loaded from primary HF repo (template downloads automatically).
6
 
7
  Vehicle model (vehicle_weights.onnx):
8
  Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
9
- Remapped to output: 1=car, 4=bus(filtered), 2=truck, 3=motorcycle
10
- Bus remapped to cls_id=4 to avoid collision with person cls_id=0.
 
 
11
 
12
  Person model (person_weights.onnx):
13
  YOLO26s FP16 960px end2end [1,300,6]. Single class: 0=person.
 
 
14
  SAHI-style tiling: full + 2 adaptive tiles + flip TTA, max-conf NMS merge.
15
 
16
- Both models run on every image. All detections merged.
17
- Vehicle cls_id=4 (bus) filtered by validator (out of range for both elements).
 
 
 
 
 
 
 
 
18
  Vehicle eval uses cls_id 1-3. Person eval uses cls_id 0 only.
19
  """
20
 
@@ -26,10 +39,13 @@ import logging as _logging
26
  _cuda_log = _logging.getLogger(__name__)
27
 
28
  def _preload_cuda_libs():
29
- """Pre-load CUDA libs from pip nvidia packages so onnxruntime-gpu finds them."""
30
  try:
31
  lib_dirs = []
32
- for mod_name in ['nvidia.cudnn', 'nvidia.cublas']:
 
 
 
33
  try:
34
  mod = __import__(mod_name, fromlist=['__file__'])
35
  lib_dir = os.path.join(os.path.dirname(mod.__file__), 'lib')
@@ -37,6 +53,30 @@ def _preload_cuda_libs():
37
  lib_dirs.append(lib_dir)
38
  except ImportError:
39
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  if not lib_dirs:
41
  return
42
  existing = os.environ.get('LD_LIBRARY_PATH', '')
@@ -45,11 +85,10 @@ def _preload_cuda_libs():
45
  for so in sorted(_glob.glob(os.path.join(lib_dir, 'lib*.so*'))):
46
  try:
47
  ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
48
- _cuda_log.info(f'Preloaded CUDA lib: {os.path.basename(so)}')
49
  except OSError:
50
  pass
51
  except Exception as e:
52
- _cuda_log.warning(f'CUDA preload error: {e}')
53
 
54
  _preload_cuda_libs()
55
 
@@ -164,18 +203,72 @@ logger = logging.getLogger(__name__)
164
 
165
  # ── Vehicle config ──────────────���───────────────────────────────────────────
166
  VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 4, 2: 2, 3: 3} # busβ†’4 (avoid person cls_id=0 collision)
 
167
  VEH_NUM_CLASSES = 4
168
- VEH_CONF_THRES = 0.35
169
- VEH_TTA_CONF = 0.10
170
  VEH_NMS_IOU = 0.50
171
 
172
- # ── Vehicle box sanity filters ─────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  VEH_MIN_WH = 8
174
- VEH_MIN_AREA = 196
175
  VEH_MAX_ASPECT = 8.0
176
  VEH_MAX_AREA_RATIO = 0.95
177
  VEH_MAX_DET = 150
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  # ── Person config (TTA consensus) ───────────────────────────────────────────
180
  PER_CONF_LOW = 0.45
181
  PER_CONF_HIGH = 0.58
@@ -194,6 +287,51 @@ PER_TILE_MIN_DIM_RATIO = 1.15 # tile when image dim > model_dim * this (~1104p
194
  PER_TILE_CONF = 0.40 # lower threshold for tile passes (NMS handles FP)
195
  PER_NMS_IOU = 0.50 # NMS IoU for merging across passes (max-conf wins)
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  # ── Shared ──────────────────────────────────────────────────────────────────
198
  WBF_SKIP_THR = 0.0001
199
 
@@ -385,6 +523,7 @@ class Miner:
385
 
386
  # Vehicle model β€” download from secondary HF repo with safety guard
387
  t0 = time.monotonic()
 
388
  try:
389
  from huggingface_hub import snapshot_download as _sd
390
  veh_path = Path(_sd(VEHICLE_HF_REPO))
@@ -406,15 +545,75 @@ class Miner:
406
  self.veh_h = int(veh_shape[2])
407
  self.veh_w = int(veh_shape[3])
408
 
409
- # Person model β€” from primary HF repo (template downloads automatically)
 
410
  self.per_session = ort.InferenceSession(
411
- str(path_hf_repo / "person_weights.onnx"),
412
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
413
  )
414
  self.per_input_name = self.per_session.get_inputs()[0].name
415
  per_shape = self.per_session.get_inputs()[0].shape
416
  self.per_h = int(per_shape[2])
417
  self.per_w = int(per_shape[3])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  # Thread pool for parallel inference
420
  self._executor = ThreadPoolExecutor(max_workers=2)
@@ -423,11 +622,61 @@ class Miner:
423
  veh_prov = self.veh_session.get_providers()
424
  per_prov = self.per_session.get_providers()
425
  logger.info(f"Vehicle ORT providers: {veh_prov}")
426
- logger.info(f"Person ORT providers: {per_prov}")
427
  logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  def __repr__(self) -> str:
430
- return "Unified Miner v3 β€” dual-model vehicle+person (repo-split, parallel, TTA-configurable)"
 
431
 
432
  # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
433
 
@@ -476,23 +725,31 @@ class Miner:
476
  return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
477
 
478
  def _infer_vehicle(self, image_bgr):
 
 
 
 
 
 
 
 
 
 
479
  oh, ow = image_bgr.shape[:2]
480
 
481
- if ENABLE_TTA:
482
- boxes1, confs1, cls1 = self._veh_run_pass(image_bgr, VEH_TTA_CONF)
483
- flipped = cv2.flip(image_bgr, 1)
484
- boxes2, confs2, cls2 = self._veh_run_pass(flipped, VEH_TTA_CONF)
485
- if len(boxes2):
486
- boxes2[:, 0], boxes2[:, 2] = ow - boxes2[:, 2], ow - boxes2[:, 0]
487
- parts = [(b, s, c) for b, s, c in
488
- [(boxes1, confs1, cls1), (boxes2, confs2, cls2)] if len(b)]
489
- if not parts:
490
- return []
491
- boxes = np.concatenate([p[0] for p in parts])
492
- confs = np.concatenate([p[1] for p in parts])
493
- cls_ids = np.concatenate([p[2] for p in parts])
494
- else:
495
- boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES)
496
 
497
  if len(boxes) == 0:
498
  return []
@@ -507,27 +764,45 @@ class Miner:
507
  if len(boxes) == 0:
508
  return []
509
 
510
- # Global confidence filter (needed after TTA where lower threshold was used)
511
- keep = confs >= VEH_CONF_THRES
512
- if not keep.any():
513
- return []
514
- boxes, confs, out_cls = boxes[keep], confs[keep], out_cls[keep]
515
-
516
- # Sanity filters
517
  img_area = float(oh * ow)
518
  sane = []
519
  for i in range(len(boxes)):
 
 
 
 
 
 
 
 
 
 
 
520
  bw = boxes[i, 2] - boxes[i, 0]
521
  bh = boxes[i, 3] - boxes[i, 1]
 
 
522
  if bw < VEH_MIN_WH or bh < VEH_MIN_WH:
523
  continue
 
524
  area = bw * bh
525
- if area < VEH_MIN_AREA:
 
 
 
526
  continue
527
- if max(bw, bh) / max(min(bw, bh), 1e-6) > VEH_MAX_ASPECT:
 
 
 
 
528
  continue
 
 
529
  if area / img_area > VEH_MAX_AREA_RATIO:
530
  continue
 
531
  sane.append(i)
532
 
533
  if not sane:
@@ -552,6 +827,373 @@ class Miner:
552
  ))
553
  return out
554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  # ── Person preprocessing (letterbox) ──────────────────────────────────
556
 
557
  def _per_letterbox(self, img):
@@ -705,15 +1347,386 @@ class Miner:
705
 
706
  return np.array(keep_b), np.array(keep_s)
707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
  def _infer_person(self, image_bgr):
709
- """Person detection with SAHI-inspired tiled inference.
710
 
711
  Pipeline:
712
- 1. Full-image pass (catches large/medium people, low effective resolution)
713
  2. 2 tiled passes (higher effective resolution for small/distant people)
714
  3. Flip TTA pass if time budget allows
715
- 4. Max-confidence NMS merge (preserves sharp scores for FP scoring)
716
  5. Sanity filters
 
717
  """
718
  oh, ow = image_bgr.shape[:2]
719
  t_start = time.monotonic()
@@ -722,7 +1735,7 @@ class Miner:
722
  all_boxes = [] # list of [N, 4] arrays
723
  all_confs = [] # list of [N] arrays
724
 
725
- # Pass 1: full image
726
  boxes_full, confs_full = self._per_run_pass(image_bgr, PER_CONF_LOW)
727
  if len(boxes_full) > 0:
728
  all_boxes.append(boxes_full)
@@ -755,10 +1768,12 @@ class Miner:
755
  if not all_boxes:
756
  return []
757
 
758
- # Merge all detections with max-confidence NMS
759
  merged_b = np.concatenate(all_boxes)
760
  merged_s = np.concatenate(all_confs)
761
- merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, PER_NMS_IOU)
 
 
762
 
763
  if len(merged_b) == 0:
764
  return []
@@ -787,11 +1802,17 @@ class Miner:
787
  cls_id=0,
788
  conf=max(0.0, min(1.0, float(merged_s[i]))),
789
  ))
 
 
 
 
 
790
  return out
791
 
792
  # ── Unified inference ───────────────────────────────────────────────────
793
 
794
  def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
 
795
  if ENABLE_PARALLEL:
796
  veh_future = self._executor.submit(self._infer_vehicle, image_bgr)
797
  per_future = self._executor.submit(self._infer_person, image_bgr)
@@ -800,6 +1821,11 @@ class Miner:
800
  else:
801
  vehicle_boxes = self._infer_vehicle(image_bgr)
802
  person_boxes = self._infer_person(image_bgr)
 
 
 
 
 
803
  return vehicle_boxes + person_boxes
804
 
805
  # -- Replay buffer -------------------------------------------------------
@@ -877,4 +1903,4 @@ class Miner:
877
  ).start()
878
 
879
  return results
880
- # Miner v3.4 β€” consensus TTA, per-class NMS boost, vehicle sanity filters, floor/ceil decode 20260401
 
1
  """
2
+ Score Vision SN44 β€” Unified miner v3.15 (2026-04-02). Background TRT engine build.
3
+ Dual-model: vehicle (YOLO11m INT8 1280, CUDA) + person (YOLO26s FP16 960 end2end, TRT).
4
+ Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
5
  Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
6
  Person weights loaded from primary HF repo (template downloads automatically).
7
 
8
  Vehicle model (vehicle_weights.onnx):
9
  Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
10
+ Output: 1=car, 2=truck, 3=motorcycle. Bus (cls_id=4) SUPPRESSED β€” not scored by validator.
11
+ Per-class confidence thresholds: car 0.45, truck 0.45, motorcycle 0.35.
12
+ Per-class aspect ratio bounds for FP filtering.
13
+ Flip TTA always enabled β€” compensates for higher confidence thresholds.
14
 
15
  Person model (person_weights.onnx):
16
  YOLO26s FP16 960px end2end [1,300,6]. Single class: 0=person.
17
+ Background TRT build: starts on CUDA immediately, builds TRT FP16 engine in background
18
+ thread (~18min on fresh node), swaps to TRT atomically when ready. Cached thereafter.
19
  SAHI-style tiling: full + 2 adaptive tiles + flip TTA, max-conf NMS merge.
20
 
21
+ Pose model (pose_weights.onnx):
22
+ YOLOv8n-pose FP16 640px [1,56,8400]. 17 COCO keypoints.
23
+ Runs once on full image after person detection.
24
+ Anatomical keypoint scoring: weighted per-keypoint sum (head 0.38, upper 0.32, lower 0.30).
25
+ 1. Head keypoints visible β†’ never suppress, always refine box.
26
+ 2. Score >= 0.15 β†’ keep + refine. Score > 0 β†’ keep as-is. Score == 0 + large + low-conf β†’ suppress.
27
+ 3. Box refinement: blend detected box with tight keypoint bbox for better fit.
28
+ Face detector (optional): if face_session loaded, face inside box β†’ never suppress.
29
+
30
+ Both vehicle + person models run on every image. All detections merged.
31
  Vehicle eval uses cls_id 1-3. Person eval uses cls_id 0 only.
32
  """
33
 
 
39
  _cuda_log = _logging.getLogger(__name__)
40
 
41
  def _preload_cuda_libs():
42
+ """Pre-load CUDA + TensorRT libs from pip packages so ORT GPU/TRT providers work."""
43
  try:
44
  lib_dirs = []
45
+ # CUDA libs from nvidia pip packages
46
+ for mod_name in ['nvidia.cudnn', 'nvidia.cublas', 'nvidia.cuda_runtime',
47
+ 'nvidia.cufft', 'nvidia.curand', 'nvidia.cusolver',
48
+ 'nvidia.cusparse', 'nvidia.nvjitlink']:
49
  try:
50
  mod = __import__(mod_name, fromlist=['__file__'])
51
  lib_dir = os.path.join(os.path.dirname(mod.__file__), 'lib')
 
53
  lib_dirs.append(lib_dir)
54
  except ImportError:
55
  pass
56
+
57
+ # TensorRT libs β€” search site-packages for tensorrt_libs directory
58
+ import sys as _sys_inner
59
+ _trt_found = False
60
+ for p in _sys_inner.path:
61
+ candidate = os.path.join(p, 'tensorrt_libs')
62
+ if os.path.isdir(candidate):
63
+ lib_dirs.append(candidate)
64
+ _trt_found = True
65
+ break
66
+ # Broader search if not found in sys.path
67
+ if not _trt_found:
68
+ for base in ['/usr/local/lib', '/usr/lib', os.path.expanduser('~/.local/lib'),
69
+ '/home/miner/.local/lib']:
70
+ for root, dirs, _ in os.walk(base):
71
+ if 'tensorrt_libs' in dirs:
72
+ lib_dirs.append(os.path.join(root, 'tensorrt_libs'))
73
+ _trt_found = True
74
+ break
75
+ if root.count(os.sep) - base.count(os.sep) > 4:
76
+ break
77
+ if _trt_found:
78
+ break
79
+
80
  if not lib_dirs:
81
  return
82
  existing = os.environ.get('LD_LIBRARY_PATH', '')
 
85
  for so in sorted(_glob.glob(os.path.join(lib_dir, 'lib*.so*'))):
86
  try:
87
  ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
 
88
  except OSError:
89
  pass
90
  except Exception as e:
91
+ _cuda_log.warning(f'CUDA/TRT preload error: {e}')
92
 
93
  _preload_cuda_libs()
94
 
 
203
 
204
  # ── Vehicle config ──────────────���───────────────────────────────────────────
205
  VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 4, 2: 2, 3: 3} # busβ†’4 (avoid person cls_id=0 collision)
206
+ VEH_SKIP_CLS = {4} # Bus: not scored by validator, just generates FP. Skip entirely.
207
  VEH_NUM_CLASSES = 4
208
+ VEH_CONF_THRES = 0.30 # Low decode threshold for TTA (final filter is per-class)
209
+ VEH_TTA_CONF = 0.20 # TTA flip pass decode threshold
210
  VEH_NMS_IOU = 0.50
211
 
212
+ # ── Per-class vehicle confidence thresholds (output cls_id) ────────────────
213
+ # Raising from uniform 0.35: reduces FP (avg 4.1 FFPI β†’ target <2.0)
214
+ VEH_CLASS_CONF: dict[int, float] = {
215
+ 1: 0.45, # car β€” most FP-prone class (75% of training data, overconfident)
216
+ 2: 0.45, # truck β€” moderate raise
217
+ 3: 0.35, # motorcycle β€” keep lower (small targets, easy to miss)
218
+ 4: 1.0, # bus β€” effectively suppressed (not scored anyway)
219
+ }
220
+
221
+ # ── Per-class vehicle aspect ratio bounds (min_ratio, max_ratio) ───────────
222
+ # ratio = max(w,h) / min(w,h). Generous bounds to avoid suppressing valid detections.
223
+ VEH_CLASS_ASPECT: dict[int, float] = {
224
+ 1: 5.0, # car β€” rarely > 5:1 from any angle
225
+ 2: 6.0, # truck β€” can be elongated
226
+ 3: 4.5, # motorcycle β€” compact, rarely very elongated
227
+ 4: 8.0, # bus (filtered anyway)
228
+ }
229
+
230
+ # ── Per-class minimum area (pixels) ───────────────────────────────────────
231
+ VEH_CLASS_MIN_AREA: dict[int, int] = {
232
+ 1: 196, # car β€” 14x14 min
233
+ 2: 256, # truck β€” 16x16 min (should be at least medium-sized)
234
+ 3: 100, # motorcycle β€” 10x10 min (can be very small in distance)
235
+ 4: 400, # bus β€” 20x20 min
236
+ }
237
+
238
+ # ── Vehicle box sanity filters (global fallbacks) ─────────────────────────
239
  VEH_MIN_WH = 8
240
+ VEH_MIN_AREA = 100
241
  VEH_MAX_ASPECT = 8.0
242
  VEH_MAX_AREA_RATIO = 0.95
243
  VEH_MAX_DET = 150
244
 
245
+ # ── Vehicle parts confirmation config ────────────────────────────────────
246
+ # Cross-validates vehicle detections using person detections, OpenCV analysis,
247
+ # and optional license plate detector. Small/distant vehicles exempt.
248
+ VEH_PARTS_ENABLED = True # Master switch for parts confirmation
249
+ VEH_PARTS_SMALL_AREA = 0.004 # Below this area ratio: exempt from suppression
250
+ VEH_PARTS_FP_CONF = 0.50 # Below this conf + large + unconfirmed β†’ suppress
251
+ VEH_PARTS_FP_CONF_STRICT = 0.55 # Stricter threshold when plate model loaded but no plate
252
+ VEH_PARTS_FP_AREA = 0.03 # Above this area ratio β†’ eligible for FP suppression
253
+ # Confidence boosts for confirmed parts (additive)
254
+ VEH_PARTS_BOOST_DRIVER = 0.08 # Person in driver/passenger region
255
+ VEH_PARTS_BOOST_RIDER = 0.10 # Person on motorcycle (overlap + optional lean)
256
+ VEH_PARTS_BOOST_HL = 0.05 # Headlight pair detected
257
+ VEH_PARTS_BOOST_PLATE = 0.12 # License plate detected (Phase 2)
258
+ VEH_PARTS_BOOST_WINDOW = 0.06 # Bus window pattern on truck
259
+ # Headlight detection thresholds
260
+ VEH_PARTS_HL_MIN_PX = 60 # Min vehicle width (px) for headlight check
261
+ VEH_PARTS_HL_BRIGHT = 200 # Grayscale threshold for bright spots
262
+ VEH_PARTS_HL_MIN_BLOB = 15 # Min contour area for headlight candidate
263
+ # Window pattern detection (bus/coach)
264
+ VEH_PARTS_WINDOW_MIN_PX = 100 # Min vehicle width for window pattern check
265
+ VEH_PARTS_WINDOW_MIN_PEAKS = 3 # Min periodic edge peaks for window confirmation
266
+ # Motorcycle rider pose
267
+ VEH_PARTS_RIDER_LEAN_DEG = 15.0 # Min torso lean from vertical (degrees) for rider pose
268
+ # Plate detection thresholds
269
+ VEH_PARTS_PLATE_MIN_PX = 120 # only check plates on medium+ vehicles # Min vehicle width for plate detection
270
+ VEH_PARTS_PLATE_CONF = 0.35 # Min plate detection confidence
271
+
272
  # ── Person config (TTA consensus) ───────────────────────────────────────────
273
  PER_CONF_LOW = 0.45
274
  PER_CONF_HIGH = 0.58
 
287
  PER_TILE_CONF = 0.40 # lower threshold for tile passes (NMS handles FP)
288
  PER_NMS_IOU = 0.50 # NMS IoU for merging across passes (max-conf wins)
289
 
290
+ # ── Pose FP filter + box refinement config ──────────────────────────────────
291
+ POSE_CONF_THRESH = 0.25 # Minimum confidence for pose detection
292
+ POSE_NMS_IOU = 0.65 # NMS IoU threshold for pose detections
293
+ POSE_MATCH_IOU = 0.30 # IoU threshold to match pose to person box
294
+ POSE_KP_CONF = 0.3 # Keypoint visibility threshold
295
+ POSE_FP_MAX_CONF = 0.65 # Max conf below which unmatched large boxes are suppressed
296
+ POSE_FP_MIN_AREA = 0.04 # Min area ratio (of image) for FP suppression to apply
297
+ POSE_REFINE_BLEND = 0.25 # Blend factor for keypoint box refinement (0=original, 1=keypoint)
298
+ POSE_KP_PAD = 0.10 # Padding around keypoint tight bbox
299
+
300
+ # ── Anatomical keypoint scoring ─────────────────────────────────────────────
301
+ # COCO keypoints: 0=nose 1=l_eye 2=r_eye 3=l_ear 4=r_ear
302
+ # 5=l_shoulder 6=r_shoulder 7=l_elbow 8=r_elbow 9=l_wrist 10=r_wrist
303
+ # 11=l_hip 12=r_hip 13=l_knee 14=r_knee 15=l_ankle 16=r_ankle
304
+ POSE_HEAD_KP = [0, 1, 2, 3, 4] # nose + eyes + ears
305
+ POSE_UPPER_KP = [5, 6, 7, 8, 9, 10] # shoulders + elbows + wrists
306
+ POSE_LOWER_KP = [11, 12, 13, 14, 15, 16] # hips + knees + ankles
307
+ # Per-keypoint weights (head > upper > lower). Sum of all = 1.0.
308
+ POSE_KP_WEIGHTS = np.array([
309
+ 0.12, # 0 nose β€” strongest single indicator
310
+ 0.08, # 1 left_eye
311
+ 0.08, # 2 right_eye
312
+ 0.05, # 3 left_ear
313
+ 0.05, # 4 right_ear
314
+ 0.07, # 5 left_shoulder
315
+ 0.07, # 6 right_shoulder
316
+ 0.05, # 7 left_elbow
317
+ 0.05, # 8 right_elbow
318
+ 0.04, # 9 left_wrist
319
+ 0.04, # 10 right_wrist
320
+ 0.05, # 11 left_hip
321
+ 0.05, # 12 right_hip
322
+ 0.04, # 13 left_knee
323
+ 0.04, # 14 right_knee
324
+ 0.03, # 15 left_ankle
325
+ 0.04, # 16 right_ankle
326
+ ], dtype=np.float32) # sums to 1.0
327
+ POSE_ANAT_REFINE_THRESH = 0.15 # Score above which we refine box with keypoints
328
+ POSE_ANAT_SUPPRESS_THRESH = 0.0 # Score at or below which suppression is considered
329
+
330
+ # ── TensorRT engine cache config ────────────────────────────────────────────
331
+ TRT_CACHE_PATH = "/tmp/trt_engine_cache"
332
+ TRT_FP16 = True
333
+ TRT_WORKSPACE_GB = 4
334
+
335
  # ── Shared ──────────────────────────────────────────────────────────────────
336
  WBF_SKIP_THR = 0.0001
337
 
 
523
 
524
  # Vehicle model β€” download from secondary HF repo with safety guard
525
  t0 = time.monotonic()
526
+ veh_path = None # Path to secondary repo snapshot (also used for plate model)
527
  try:
528
  from huggingface_hub import snapshot_download as _sd
529
  veh_path = Path(_sd(VEHICLE_HF_REPO))
 
545
  self.veh_h = int(veh_shape[2])
546
  self.veh_w = int(veh_shape[3])
547
 
548
+ # Person model β€” CUDA immediately, TRT engine builds in background
549
+ per_onnx = str(path_hf_repo / "person_weights.onnx")
550
  self.per_session = ort.InferenceSession(
551
+ per_onnx,
552
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
553
  )
554
  self.per_input_name = self.per_session.get_inputs()[0].name
555
  per_shape = self.per_session.get_inputs()[0].shape
556
  self.per_h = int(per_shape[2])
557
  self.per_w = int(per_shape[3])
558
+ self._trt_ready = False
559
+ logger.info("[init] Person model: CUDA (TRT build starting in background)")
560
+
561
+ # Launch background TRT engine build
562
+ os.makedirs(TRT_CACHE_PATH, exist_ok=True)
563
+ threading.Thread(
564
+ target=self._build_trt_engine,
565
+ args=(per_onnx,),
566
+ daemon=True,
567
+ name="trt-builder",
568
+ ).start()
569
+
570
+ # Pose model β€” for FP filtering + box refinement
571
+ pose_path = path_hf_repo / "pose_weights.onnx"
572
+ if pose_path.exists():
573
+ self.pose_session = ort.InferenceSession(
574
+ str(pose_path),
575
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
576
+ )
577
+ self.pose_input_name = self.pose_session.get_inputs()[0].name
578
+ pose_shape = self.pose_session.get_inputs()[0].shape
579
+ self.pose_h = int(pose_shape[2])
580
+ self.pose_w = int(pose_shape[3])
581
+ logger.info(f"[init] Pose model loaded: {self.pose_h}x{self.pose_w}")
582
+ else:
583
+ self.pose_session = None
584
+ logger.info("[init] No pose model found, FP filter disabled")
585
+
586
+ # Face detector (SCRFD-500M) β€” confirms person boxes, prevents FP suppression
587
+ face_path = path_hf_repo / "face_weights.onnx"
588
+ if face_path.exists():
589
+ self.face_session = ort.InferenceSession(
590
+ str(face_path),
591
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
592
+ )
593
+ self.face_input_name = self.face_session.get_inputs()[0].name
594
+ logger.info("[init] Face model (SCRFD-500M) loaded")
595
+ else:
596
+ self.face_session = None
597
+ logger.info("[init] No face model found")
598
+
599
+ # License plate detector β€” loaded from secondary HF repo alongside vehicle weights
600
+ plate_path = veh_path / "plate_weights.onnx" if veh_path else None
601
+ if plate_path and plate_path.exists():
602
+ self.plate_session = ort.InferenceSession(
603
+ str(plate_path),
604
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
605
+ )
606
+ self.plate_input_name = self.plate_session.get_inputs()[0].name
607
+ plate_shape = self.plate_session.get_inputs()[0].shape
608
+ self.plate_h = int(plate_shape[2]) if isinstance(plate_shape[2], int) else 640
609
+ self.plate_w = int(plate_shape[3]) if isinstance(plate_shape[3], int) else 640
610
+ logger.info(f"[init] Plate model loaded: {self.plate_h}x{self.plate_w}")
611
+ else:
612
+ self.plate_session = None
613
+ logger.info("[init] No plate model found, plate confirmation disabled")
614
+
615
+ # Pose cache β€” populated by _pose_filter_refine, read by vehicle parts
616
+ self._cached_pose_data = None
617
 
618
  # Thread pool for parallel inference
619
  self._executor = ThreadPoolExecutor(max_workers=2)
 
622
  veh_prov = self.veh_session.get_providers()
623
  per_prov = self.per_session.get_providers()
624
  logger.info(f"Vehicle ORT providers: {veh_prov}")
625
+ logger.info(f"Person ORT providers: {per_prov} (TRT building in background)")
626
  logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
627
 
628
+ def _build_trt_engine(self, per_onnx):
629
+ """Build TRT FP16 engine in background, swap person session when ready.
630
+
631
+ On fresh nodes: ~18 min to compile. Cached engine loads in <1s.
632
+ During build, inference uses CUDAExecutionProvider (passes RTF at ~78ms).
633
+ After build, atomically swaps to TRT session (~29ms pipeline).
634
+ """
635
+ try:
636
+ trt_opts = {
637
+ "trt_fp16_enable": str(TRT_FP16).lower(),
638
+ "trt_max_workspace_size": str(TRT_WORKSPACE_GB << 30),
639
+ "trt_engine_cache_enable": "true",
640
+ "trt_engine_cache_path": TRT_CACHE_PATH,
641
+ }
642
+ t0 = time.monotonic()
643
+ logger.info("[trt-build] Creating TRT session (may take ~18min on fresh node)...")
644
+ trt_session = ort.InferenceSession(
645
+ per_onnx,
646
+ providers=[
647
+ ("TensorrtExecutionProvider", trt_opts),
648
+ "CUDAExecutionProvider",
649
+ "CPUExecutionProvider",
650
+ ],
651
+ )
652
+
653
+ provs = trt_session.get_providers()
654
+ if "TensorrtExecutionProvider" not in provs:
655
+ logger.warning("[trt-build] TRT provider not active (%s), keeping CUDA", provs)
656
+ return
657
+
658
+ # Run dummy inference to fully materialize the engine
659
+ inp_name = trt_session.get_inputs()[0].name
660
+ inp_shape = trt_session.get_inputs()[0].shape
661
+ dummy = np.zeros((1, 3, int(inp_shape[2]), int(inp_shape[3])), dtype=np.float32)
662
+ trt_session.run(None, {inp_name: dummy})
663
+
664
+ dt = time.monotonic() - t0
665
+ logger.info("[trt-build] TRT engine ready in %.1fs β€” swapping person session", dt)
666
+
667
+ # Atomic swap β€” Python GIL makes single attribute assignment safe.
668
+ # Any in-flight inference holds a reference to the old session, which
669
+ # stays alive until that inference completes.
670
+ self.per_session = trt_session
671
+ self._trt_ready = True
672
+
673
+ logger.info("[trt-build] Person model now using TensorRT FP16")
674
+ except Exception as e:
675
+ logger.warning("[trt-build] TRT build failed (%s), keeping CUDA", e)
676
+
677
  def __repr__(self) -> str:
678
+ trt_status = "TRT" if self._trt_ready else "CUDA (TRT building)"
679
+ return f"Unified Miner v3.15 β€” person={trt_status}, background TRT engine build"
680
 
681
  # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
682
 
 
725
  return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
726
 
727
  def _infer_vehicle(self, image_bgr):
728
+ """Vehicle detection with flip TTA, per-class confidence, and aspect ratio filtering.
729
+
730
+ Pipeline:
731
+ 1. Original pass at VEH_CONF_THRES
732
+ 2. Flip TTA pass at VEH_TTA_CONF (always enabled)
733
+ 3. Remap classes, per-class NMS
734
+ 4. Per-class confidence filter (higher thresholds reduce FP)
735
+ 5. Per-class aspect ratio filter
736
+ 6. Skip bus (cls_id=4, not scored by validator)
737
+ """
738
  oh, ow = image_bgr.shape[:2]
739
 
740
+ # Always run flip TTA for vehicle β€” compensates for higher per-class thresholds
741
+ boxes1, confs1, cls1 = self._veh_run_pass(image_bgr, VEH_CONF_THRES)
742
+ flipped = cv2.flip(image_bgr, 1)
743
+ boxes2, confs2, cls2 = self._veh_run_pass(flipped, VEH_TTA_CONF)
744
+ if len(boxes2):
745
+ boxes2[:, 0], boxes2[:, 2] = ow - boxes2[:, 2], ow - boxes2[:, 0]
746
+ parts = [(b, s, c) for b, s, c in
747
+ [(boxes1, confs1, cls1), (boxes2, confs2, cls2)] if len(b)]
748
+ if not parts:
749
+ return []
750
+ boxes = np.concatenate([p[0] for p in parts])
751
+ confs = np.concatenate([p[1] for p in parts])
752
+ cls_ids = np.concatenate([p[2] for p in parts])
 
 
753
 
754
  if len(boxes) == 0:
755
  return []
 
764
  if len(boxes) == 0:
765
  return []
766
 
767
+ # Per-class confidence filter + aspect ratio filter + bus suppression
 
 
 
 
 
 
768
  img_area = float(oh * ow)
769
  sane = []
770
  for i in range(len(boxes)):
771
+ cls = int(out_cls[i])
772
+
773
+ # Skip bus entirely (not scored by validator, just generates FP)
774
+ if cls in VEH_SKIP_CLS:
775
+ continue
776
+
777
+ # Per-class confidence threshold
778
+ min_conf = VEH_CLASS_CONF.get(cls, VEH_CONF_THRES)
779
+ if confs[i] < min_conf:
780
+ continue
781
+
782
  bw = boxes[i, 2] - boxes[i, 0]
783
  bh = boxes[i, 3] - boxes[i, 1]
784
+
785
+ # Minimum dimension
786
  if bw < VEH_MIN_WH or bh < VEH_MIN_WH:
787
  continue
788
+
789
  area = bw * bh
790
+
791
+ # Per-class minimum area
792
+ min_area = VEH_CLASS_MIN_AREA.get(cls, VEH_MIN_AREA)
793
+ if area < min_area:
794
  continue
795
+
796
+ # Per-class aspect ratio filter
797
+ aspect = max(bw, bh) / max(min(bw, bh), 1e-6)
798
+ max_aspect = VEH_CLASS_ASPECT.get(cls, VEH_MAX_ASPECT)
799
+ if aspect > max_aspect:
800
  continue
801
+
802
+ # Max area ratio (covers entire image β€” likely FP)
803
  if area / img_area > VEH_MAX_AREA_RATIO:
804
  continue
805
+
806
  sane.append(i)
807
 
808
  if not sane:
 
827
  ))
828
  return out
829
 
830
+ # ── Vehicle parts confirmation ───────────────────────────────────────
831
+
832
+ @staticmethod
833
+ def _veh_check_driver(vb, person_boxes):
834
+ """Check if any person detection overlaps the driver/passenger region.
835
+
836
+ Driver region: upper 55% height, center 70% width of vehicle box.
837
+ A person's center inside this region β†’ vehicle confirmed.
838
+ """
839
+ if not person_boxes:
840
+ return False
841
+ vw = vb.x2 - vb.x1
842
+ vh = vb.y2 - vb.y1
843
+ dr_x1 = vb.x1 + vw * 0.15
844
+ dr_y1 = vb.y1
845
+ dr_x2 = vb.x2 - vw * 0.15
846
+ dr_y2 = vb.y1 + vh * 0.55
847
+ for pb in person_boxes:
848
+ pcx = (pb.x1 + pb.x2) / 2
849
+ pcy = (pb.y1 + pb.y2) / 2
850
+ if dr_x1 <= pcx <= dr_x2 and dr_y1 <= pcy <= dr_y2:
851
+ return True
852
+ return False
853
+
854
+ def _veh_check_rider(self, moto_box, person_boxes):
855
+ """Check if motorcycle has a rider, optionally with forward-lean pose.
856
+
857
+ Returns (has_overlap, has_lean_pose).
858
+ Uses cached pose keypoints from person pipeline to check torso angle.
859
+ Motorcycle riders lean forward (torso > 15Β° from vertical).
860
+ """
861
+ if not person_boxes:
862
+ return False, False
863
+ mw = moto_box.x2 - moto_box.x1
864
+ mh = moto_box.y2 - moto_box.y1
865
+ mx = mw * 0.1
866
+ my = mh * 0.1
867
+ has_overlap = False
868
+ for pb in person_boxes:
869
+ pcx = (pb.x1 + pb.x2) / 2
870
+ pcy = (pb.y1 + pb.y2) / 2
871
+ if (moto_box.x1 - mx <= pcx <= moto_box.x2 + mx and
872
+ moto_box.y1 - my <= pcy <= moto_box.y2 + my):
873
+ has_overlap = True
874
+ break
875
+ if not has_overlap:
876
+ return False, False
877
+
878
+ # Check forward-lean pose using cached pose data
879
+ if self._cached_pose_data is None:
880
+ return True, False
881
+ pose_boxes, pose_kps = self._cached_pose_data
882
+ if len(pose_boxes) == 0:
883
+ return True, False
884
+
885
+ for j in range(len(pose_boxes)):
886
+ pb = pose_boxes[j]
887
+ pcx = (pb[0] + pb[2]) / 2
888
+ pcy = (pb[1] + pb[3]) / 2
889
+ if not (moto_box.x1 - mx <= pcx <= moto_box.x2 + mx and
890
+ moto_box.y1 - my <= pcy <= moto_box.y2 + my):
891
+ continue
892
+ kps = pose_kps[j]
893
+ # Need at least one shoulder + one hip visible
894
+ l_sh, r_sh = kps[5], kps[6]
895
+ l_hip, r_hip = kps[11], kps[12]
896
+ sh_vis = [k[:2] for k in [l_sh, r_sh] if k[2] >= POSE_KP_CONF]
897
+ hip_vis = [k[:2] for k in [l_hip, r_hip] if k[2] >= POSE_KP_CONF]
898
+ if not sh_vis or not hip_vis:
899
+ continue
900
+ sh_mid = np.mean(sh_vis, axis=0)
901
+ hip_mid = np.mean(hip_vis, axis=0)
902
+ dx = sh_mid[0] - hip_mid[0]
903
+ dy = hip_mid[1] - sh_mid[1] # positive = shoulder above hip
904
+ if dy <= 0:
905
+ continue
906
+ angle = math.degrees(math.atan2(abs(dx), dy))
907
+ if angle >= VEH_PARTS_RIDER_LEAN_DEG:
908
+ return True, True
909
+ return True, False
910
+
911
+ def _veh_check_headlights(self, vb, image_bgr):
912
+ """Detect bright symmetric pair in lower portion of vehicle box.
913
+
914
+ Requires two bright blobs at similar y, on opposite sides of center,
915
+ with similar area. Only checks vehicles wider than VEH_PARTS_HL_MIN_PX.
916
+ """
917
+ bw = vb.x2 - vb.x1
918
+ bh = vb.y2 - vb.y1
919
+ if bw < VEH_PARTS_HL_MIN_PX or bh < 30:
920
+ return False
921
+
922
+ oh, ow = image_bgr.shape[:2]
923
+ y1 = max(0, min(oh, int(vb.y1 + bh * 0.65)))
924
+ y2 = max(0, min(oh, int(vb.y2)))
925
+ x1 = max(0, min(ow, int(vb.x1)))
926
+ x2 = max(0, min(ow, int(vb.x2)))
927
+ if y2 - y1 < 5 or x2 - x1 < 10:
928
+ return False
929
+
930
+ roi = image_bgr[y1:y2, x1:x2]
931
+ gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
932
+ _, bright = cv2.threshold(gray, VEH_PARTS_HL_BRIGHT, 255, cv2.THRESH_BINARY)
933
+ contours, _ = cv2.findContours(bright, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
934
+
935
+ blobs = []
936
+ for c in contours:
937
+ area = cv2.contourArea(c)
938
+ if area < VEH_PARTS_HL_MIN_BLOB:
939
+ continue
940
+ M = cv2.moments(c)
941
+ if M["m00"] < 1:
942
+ continue
943
+ blobs.append((M["m10"] / M["m00"], M["m01"] / M["m00"], area))
944
+
945
+ if len(blobs) < 2:
946
+ return False
947
+
948
+ roi_mid = (x2 - x1) / 2.0
949
+ roi_h = y2 - y1
950
+ for i in range(len(blobs)):
951
+ for j in range(i + 1, len(blobs)):
952
+ b1, b2 = blobs[i], blobs[j]
953
+ if abs(b1[1] - b2[1]) > roi_h * 0.4:
954
+ continue
955
+ if max(b1[2], b2[2]) / max(min(b1[2], b2[2]), 1) > 3.0:
956
+ continue
957
+ if (b1[0] - roi_mid) * (b2[0] - roi_mid) < 0:
958
+ return True
959
+ return False
960
+
961
+ def _veh_check_windows(self, vb, image_bgr):
962
+ """Detect repeated window pattern (bus/coach signature) using vertical edge periodicity.
963
+
964
+ Extracts middle horizontal band, applies vertical Sobel, projects vertically,
965
+ and checks for 3+ regularly-spaced peaks (window frame edges).
966
+ Only for large vehicles (truck cls_id=2).
967
+ """
968
+ bw = vb.x2 - vb.x1
969
+ bh = vb.y2 - vb.y1
970
+ if bw < VEH_PARTS_WINDOW_MIN_PX or bh < 40:
971
+ return False
972
+
973
+ oh, ow = image_bgr.shape[:2]
974
+ # Middle 40% of height (window band on a bus/coach)
975
+ y1 = max(0, min(oh, int(vb.y1 + bh * 0.30)))
976
+ y2 = max(0, min(oh, int(vb.y1 + bh * 0.70)))
977
+ x1 = max(0, min(ow, int(vb.x1)))
978
+ x2 = max(0, min(ow, int(vb.x2)))
979
+ if y2 - y1 < 10 or x2 - x1 < 30:
980
+ return False
981
+
982
+ roi = image_bgr[y1:y2, x1:x2]
983
+ gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
984
+
985
+ # Vertical edge detection (window frames are vertical edges)
986
+ sobel_v = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
987
+ abs_sobel = np.abs(sobel_v)
988
+
989
+ # Project vertically: mean per column
990
+ projection = abs_sobel.mean(axis=0)
991
+ if len(projection) < 10:
992
+ return False
993
+
994
+ # Smooth projection
995
+ ks = max(3, int(len(projection) * 0.02) | 1)
996
+ projection = np.convolve(projection, np.ones(ks) / ks, mode='same')
997
+
998
+ # Find peaks above mean + 1 std
999
+ thresh = projection.mean() + projection.std()
1000
+ peaks = []
1001
+ in_peak = False
1002
+ pk_start = 0
1003
+ for i in range(len(projection)):
1004
+ if projection[i] > thresh:
1005
+ if not in_peak:
1006
+ pk_start = i
1007
+ in_peak = True
1008
+ else:
1009
+ if in_peak:
1010
+ peaks.append((pk_start + i) // 2)
1011
+ in_peak = False
1012
+ if in_peak:
1013
+ peaks.append((pk_start + len(projection) - 1) // 2)
1014
+
1015
+ if len(peaks) < VEH_PARTS_WINDOW_MIN_PEAKS:
1016
+ return False
1017
+
1018
+ # Check regular spacing: gaps within 40% of median
1019
+ gaps = [peaks[i + 1] - peaks[i] for i in range(len(peaks) - 1)]
1020
+ if not gaps:
1021
+ return False
1022
+ med = sorted(gaps)[len(gaps) // 2]
1023
+ if med < 5:
1024
+ return False
1025
+ regular = sum(1 for g in gaps if abs(g - med) / max(med, 1) < 0.4)
1026
+ return regular >= len(gaps) * 0.6
1027
+
1028
+ def _veh_check_plate(self, vb, image_bgr):
1029
+ """Run license plate detector on a vehicle crop. Returns True if plate found."""
1030
+ if self.plate_session is None:
1031
+ return False
1032
+ bw = vb.x2 - vb.x1
1033
+ if bw < VEH_PARTS_PLATE_MIN_PX:
1034
+ return False
1035
+
1036
+ oh, ow = image_bgr.shape[:2]
1037
+ # Crop vehicle region with 5% padding
1038
+ pad_x = int(bw * 0.05)
1039
+ pad_y = int((vb.y2 - vb.y1) * 0.05)
1040
+ cx1 = max(0, int(vb.x1) - pad_x)
1041
+ cy1 = max(0, int(vb.y1) - pad_y)
1042
+ cx2 = min(ow, int(vb.x2) + pad_x)
1043
+ cy2 = min(oh, int(vb.y2) + pad_y)
1044
+ crop = image_bgr[cy1:cy2, cx1:cx2]
1045
+ if crop.size == 0:
1046
+ return False
1047
+
1048
+ # Letterbox to plate model input
1049
+ ch, cw = crop.shape[:2]
1050
+ r = min(self.plate_h / ch, self.plate_w / cw)
1051
+ nw, nh = int(round(cw * r)), int(round(ch * r))
1052
+ img_r = cv2.resize(crop, (nw, nh), interpolation=cv2.INTER_LINEAR)
1053
+ dw, dh = self.plate_w - nw, self.plate_h - nh
1054
+ pl, pt = dw // 2, dh // 2
1055
+ img_p = cv2.copyMakeBorder(
1056
+ img_r, pt, dh - pt, pl, dw - pl,
1057
+ cv2.BORDER_CONSTANT, value=(114, 114, 114),
1058
+ )
1059
+ rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
1060
+ inp = rgb.astype(np.float32) / 255.0
1061
+ inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
1062
+
1063
+ raw = self.plate_session.run(None, {self.plate_input_name: inp})[0]
1064
+ pred = raw[0] if raw.ndim == 3 else raw
1065
+
1066
+ # Handle both [N,6] end2end (post-NMS) and [N, 5+nc] raw formats
1067
+ if pred.shape[0] < pred.shape[1]:
1068
+ pred = pred.T # transpose [5+nc, N] -> [N, 5+nc]
1069
+ if pred.shape[1] < 5:
1070
+ return False
1071
+ # End2end post-NMS: few detections (< 500), col4=conf already final
1072
+ if pred.shape[0] < 500 and pred.shape[1] == 6:
1073
+ confs = pred[:, 4]
1074
+ elif pred.shape[1] == 5:
1075
+ confs = pred[:, 4] # single objectness score
1076
+ else:
1077
+ # Raw: x,y,w,h,objectness,cls_scores... β†’ conf = obj * max(cls)
1078
+ confs = pred[:, 4] * np.max(pred[:, 5:], axis=1)
1079
+ return bool((confs >= VEH_PARTS_PLATE_CONF).any())
1080
+
1081
+ def _vehicle_parts_confirm(self, vehicle_boxes, person_boxes, image_bgr):
1082
+ """Parts-based confidence scoring for vehicle detections.
1083
+
1084
+ Scoring hierarchy (confidence boosts are additive):
1085
+ 1. License plate detected β†’ +0.12 (strong, never suppress)
1086
+ 2. Person (driver/rider) inside vehicle β†’ +0.08-0.10
1087
+ 3. Headlight pair detected β†’ +0.05
1088
+ 4. Bus window pattern on truck β†’ +0.06
1089
+ 5. No parts but small/distant or high-conf β†’ keep original
1090
+ 6. Large + low-conf + no parts β†’ suppress as FP
1091
+
1092
+ Small/distant vehicles (area < 0.4% of image) are always exempt.
1093
+ Bus (cls_id=4) suppressed in _infer_vehicle β€” window check applies to trucks.
1094
+ """
1095
+ if not vehicle_boxes or not VEH_PARTS_ENABLED:
1096
+ return vehicle_boxes
1097
+
1098
+ oh, ow = image_bgr.shape[:2]
1099
+ img_area = float(oh * ow)
1100
+ has_plate_model = self.plate_session is not None
1101
+ # Skip plate checks on crowded scenes (aerial/drone, plates invisible)
1102
+ skip_plate = len(vehicle_boxes) > 20
1103
+
1104
+ result = []
1105
+ n_driver = 0
1106
+ n_rider = 0
1107
+ n_rider_lean = 0
1108
+ n_headlight = 0
1109
+ n_window = 0
1110
+ n_plate = 0
1111
+ n_suppressed = 0
1112
+
1113
+ for vb in vehicle_boxes:
1114
+ bw = vb.x2 - vb.x1
1115
+ bh = vb.y2 - vb.y1
1116
+ area_ratio = (bw * bh) / img_area
1117
+
1118
+ # Small/distant: exempt from parts check
1119
+ if area_ratio < VEH_PARTS_SMALL_AREA:
1120
+ result.append(vb)
1121
+ continue
1122
+
1123
+ boost = 0.0
1124
+ confirmed = False
1125
+
1126
+ # Check 1: License plate (strongest signal)
1127
+ if has_plate_model and not skip_plate and bw >= VEH_PARTS_PLATE_MIN_PX:
1128
+ try:
1129
+ if self._veh_check_plate(vb, image_bgr):
1130
+ boost += VEH_PARTS_BOOST_PLATE
1131
+ confirmed = True
1132
+ n_plate += 1
1133
+ except Exception:
1134
+ pass
1135
+
1136
+ # Check 2: Driver/passenger inside car or truck
1137
+ if vb.cls_id in (1, 2):
1138
+ if self._veh_check_driver(vb, person_boxes):
1139
+ boost += VEH_PARTS_BOOST_DRIVER
1140
+ confirmed = True
1141
+ n_driver += 1
1142
+
1143
+ # Check 3: Motorcycle rider (overlap + optional lean pose)
1144
+ if vb.cls_id == 3:
1145
+ has_overlap, has_lean = self._veh_check_rider(vb, person_boxes)
1146
+ if has_overlap:
1147
+ boost += VEH_PARTS_BOOST_RIDER
1148
+ if has_lean:
1149
+ boost += 0.05 # Extra for confirmed lean pose
1150
+ n_rider_lean += 1
1151
+ confirmed = True
1152
+ n_rider += 1
1153
+
1154
+ # Check 4: Headlight pair
1155
+ if bw >= VEH_PARTS_HL_MIN_PX:
1156
+ try:
1157
+ if self._veh_check_headlights(vb, image_bgr):
1158
+ boost += VEH_PARTS_BOOST_HL
1159
+ confirmed = True
1160
+ n_headlight += 1
1161
+ except Exception:
1162
+ pass
1163
+
1164
+ # Check 5: Window pattern (large trucks that might be buses)
1165
+ if vb.cls_id == 2 and bw >= VEH_PARTS_WINDOW_MIN_PX:
1166
+ try:
1167
+ if self._veh_check_windows(vb, image_bgr):
1168
+ boost += VEH_PARTS_BOOST_WINDOW
1169
+ n_window += 1
1170
+ except Exception:
1171
+ pass
1172
+
1173
+ # Apply boost and decide
1174
+ new_conf = min(1.0, vb.conf + boost)
1175
+
1176
+ if confirmed:
1177
+ result.append(BoundingBox(
1178
+ x1=vb.x1, y1=vb.y1, x2=vb.x2, y2=vb.y2,
1179
+ cls_id=vb.cls_id, conf=new_conf,
1180
+ ))
1181
+ elif area_ratio > VEH_PARTS_FP_AREA:
1182
+ # Large vehicle β€” use stricter threshold if plate model loaded
1183
+ fp_thresh = VEH_PARTS_FP_CONF_STRICT if (has_plate_model and not skip_plate) else VEH_PARTS_FP_CONF
1184
+ if vb.conf < fp_thresh:
1185
+ n_suppressed += 1
1186
+ else:
1187
+ result.append(vb)
1188
+ else:
1189
+ result.append(vb)
1190
+
1191
+ if n_driver or n_rider or n_headlight or n_window or n_plate or n_suppressed:
1192
+ logger.info(f"[veh-parts] plate={n_plate} driver={n_driver} rider={n_rider}"
1193
+ f"(lean={n_rider_lean}) hl={n_headlight} win={n_window} "
1194
+ f"suppress={n_suppressed}, kept {len(result)}/{len(vehicle_boxes)}")
1195
+ return result
1196
+
1197
  # ── Person preprocessing (letterbox) ──────────────────────────────────
1198
 
1199
  def _per_letterbox(self, img):
 
1347
 
1348
  return np.array(keep_b), np.array(keep_s)
1349
 
1350
+ # ── Pose FP filter + box refinement ──────────────────────────────────
1351
+
1352
+ def _pose_run(self, image_bgr):
1353
+ """Run pose model on full image, return (boxes [N,4], confs [N], keypoints [N,17,3]) in original coords."""
1354
+ if self.pose_session is None:
1355
+ return np.empty((0, 4)), np.empty(0), np.empty((0, 17, 3))
1356
+
1357
+ oh, ow = image_bgr.shape[:2]
1358
+
1359
+ # Letterbox to pose model input size
1360
+ r = min(self.pose_h / oh, self.pose_w / ow)
1361
+ nw, nh = int(round(ow * r)), int(round(oh * r))
1362
+ img_r = cv2.resize(image_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
1363
+ dw, dh = self.pose_w - nw, self.pose_h - nh
1364
+ pl, pt = dw // 2, dh // 2
1365
+ img_p = cv2.copyMakeBorder(
1366
+ img_r, pt, dh - pt, pl, dw - pl,
1367
+ cv2.BORDER_CONSTANT, value=(114, 114, 114),
1368
+ )
1369
+
1370
+ rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
1371
+ inp = rgb.astype(np.float32) / 255.0
1372
+ inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
1373
+
1374
+ raw = self.pose_session.run(None, {self.pose_input_name: inp})[0]
1375
+
1376
+ # raw shape: [1, 56, 8400] -> transpose to [8400, 56]
1377
+ pred = raw[0] if raw.ndim == 3 else raw
1378
+ if pred.shape[0] < pred.shape[1]:
1379
+ pred = pred.T
1380
+
1381
+ # Decode: cols 0-3=xywh, col 4=conf, cols 5-55=17*3 keypoints
1382
+ confs = pred[:, 4]
1383
+ keep = confs >= POSE_CONF_THRESH
1384
+ if not keep.any():
1385
+ return np.empty((0, 4)), np.empty(0), np.empty((0, 17, 3))
1386
+
1387
+ pred = pred[keep]
1388
+ confs = pred[:, 4]
1389
+
1390
+ # Convert xywh to x1y1x2y2 in original coords
1391
+ cx, cy, bw, bh = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
1392
+ x1 = np.clip((cx - bw / 2 - pl) / r, 0, ow)
1393
+ y1 = np.clip((cy - bh / 2 - pt) / r, 0, oh)
1394
+ x2 = np.clip((cx + bw / 2 - pl) / r, 0, ow)
1395
+ y2 = np.clip((cy + bh / 2 - pt) / r, 0, oh)
1396
+ boxes = np.stack([x1, y1, x2, y2], axis=1)
1397
+
1398
+ # Decode keypoints: [N, 51] -> [N, 17, 3]
1399
+ kp_raw = pred[:, 5:].reshape(-1, 17, 3).copy()
1400
+ kp_raw[:, :, 0] = (kp_raw[:, :, 0] - pl) / r # x
1401
+ kp_raw[:, :, 1] = (kp_raw[:, :, 1] - pt) / r # y
1402
+ kp_raw[:, :, 0] = np.clip(kp_raw[:, :, 0], 0, ow)
1403
+ kp_raw[:, :, 1] = np.clip(kp_raw[:, :, 1], 0, oh)
1404
+
1405
+ # NMS on pose detections
1406
+ order = np.argsort(-confs)
1407
+ boxes = boxes[order]
1408
+ confs = confs[order]
1409
+ kp_raw = kp_raw[order]
1410
+
1411
+ keep_idx = []
1412
+ suppressed = set()
1413
+ for i in range(len(boxes)):
1414
+ if i in suppressed:
1415
+ continue
1416
+ keep_idx.append(i)
1417
+ for j in range(i + 1, len(boxes)):
1418
+ if j in suppressed:
1419
+ continue
1420
+ xx1 = max(boxes[i, 0], boxes[j, 0])
1421
+ yy1 = max(boxes[i, 1], boxes[j, 1])
1422
+ xx2 = min(boxes[i, 2], boxes[j, 2])
1423
+ yy2 = min(boxes[i, 3], boxes[j, 3])
1424
+ inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
1425
+ a1 = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
1426
+ a2 = (boxes[j, 2] - boxes[j, 0]) * (boxes[j, 3] - boxes[j, 1])
1427
+ iou_val = inter / (a1 + a2 - inter + 1e-9)
1428
+ if iou_val >= POSE_NMS_IOU:
1429
+ suppressed.add(j)
1430
+
1431
+ if not keep_idx:
1432
+ return np.empty((0, 4)), np.empty(0), np.empty((0, 17, 3))
1433
+ keep_idx = np.array(keep_idx)
1434
+ return boxes[keep_idx], confs[keep_idx], kp_raw[keep_idx]
1435
+
1436
+ _FACE_SIZE = 640
1437
+ _FACE_STRIDES = (8, 16, 32)
1438
+ _FACE_NUM_ANCHORS = 2
1439
+ _FACE_THRESH = 0.5
1440
+ _FACE_NMS_THRESH = 0.4
1441
+
1442
+ def _face_run(self, image_bgr):
1443
+ """Run SCRFD-500M face detector. Returns (face_boxes [N,4], face_confs [N])."""
1444
+ if self.face_session is None:
1445
+ return np.empty((0, 4)), np.empty(0)
1446
+
1447
+ oh, ow = image_bgr.shape[:2]
1448
+ sz = self._FACE_SIZE
1449
+
1450
+ # Letterbox resize preserving aspect ratio (top-left aligned)
1451
+ scale = min(sz / oh, sz / ow)
1452
+ nw, nh = int(round(ow * scale)), int(round(oh * scale))
1453
+ resized = cv2.resize(image_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
1454
+ det_img = np.zeros((sz, sz, 3), dtype=np.uint8)
1455
+ det_img[:nh, :nw, :] = resized
1456
+
1457
+ # Preprocess: BGR→RGB, (pixel - 127.5) / 128.0
1458
+ blob = cv2.dnn.blobFromImage(
1459
+ det_img, 1.0 / 128.0, (sz, sz), (127.5, 127.5, 127.5), swapRB=True,
1460
+ )
1461
+
1462
+ outputs = self.face_session.run(None, {self.face_input_name: blob})
1463
+
1464
+ # Decode 3 stride levels: outputs[0:3]=scores, [3:6]=bboxes, [6:9]=kps
1465
+ all_scores, all_boxes = [], []
1466
+ for idx, stride in enumerate(self._FACE_STRIDES):
1467
+ scores = outputs[idx][:, 0] # (N,)
1468
+ bbox_d = outputs[idx + 3] # (N, 4) distances
1469
+ keep = scores >= self._FACE_THRESH
1470
+ if not keep.any():
1471
+ continue
1472
+ scores = scores[keep]
1473
+ bbox_d = bbox_d[keep]
1474
+
1475
+ # Generate anchor centers for kept positions
1476
+ fh, fw = sz // stride, sz // stride
1477
+ grid_y, grid_x = np.mgrid[:fh, :fw]
1478
+ centers = np.stack([grid_x, grid_y], axis=-1).astype(np.float32).reshape(-1, 2)
1479
+ centers = np.tile(centers, (1, self._FACE_NUM_ANCHORS)).reshape(-1, 2) * stride
1480
+ centers = centers[keep]
1481
+
1482
+ # distance β†’ bbox: [x1, y1, x2, y2]
1483
+ x1 = centers[:, 0] - bbox_d[:, 0] * stride
1484
+ y1 = centers[:, 1] - bbox_d[:, 1] * stride
1485
+ x2 = centers[:, 0] + bbox_d[:, 2] * stride
1486
+ y2 = centers[:, 1] + bbox_d[:, 3] * stride
1487
+ boxes = np.stack([x1, y1, x2, y2], axis=-1) / scale
1488
+
1489
+ all_scores.append(scores)
1490
+ all_boxes.append(boxes)
1491
+
1492
+ if not all_scores:
1493
+ return np.empty((0, 4)), np.empty(0)
1494
+
1495
+ scores = np.concatenate(all_scores)
1496
+ boxes = np.concatenate(all_boxes)
1497
+
1498
+ # NMS
1499
+ order = scores.argsort()[::-1]
1500
+ scores, boxes = scores[order], boxes[order]
1501
+ keep = []
1502
+ x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
1503
+ areas = (x2 - x1) * (y2 - y1)
1504
+ suppressed = np.zeros(len(scores), dtype=bool)
1505
+ for i in range(len(scores)):
1506
+ if suppressed[i]:
1507
+ continue
1508
+ keep.append(i)
1509
+ xx1 = np.maximum(x1[i], x1[i + 1:])
1510
+ yy1 = np.maximum(y1[i], y1[i + 1:])
1511
+ xx2 = np.minimum(x2[i], x2[i + 1:])
1512
+ yy2 = np.minimum(y2[i], y2[i + 1:])
1513
+ inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
1514
+ ovr = inter / (areas[i] + areas[i + 1:] - inter + 1e-6)
1515
+ suppressed[i + 1:] |= ovr > self._FACE_NMS_THRESH
1516
+
1517
+ return boxes[keep], scores[keep]
1518
+
1519
+ @staticmethod
1520
+ def _anatomical_score(kps, kp_conf_thresh=POSE_KP_CONF):
1521
+ """Compute weighted anatomical score from keypoints [17, 3].
1522
+
1523
+ Returns (score, has_head, n_visible):
1524
+ score: weighted sum of visible keypoints (0.0-1.0)
1525
+ has_head: True if any head keypoint (nose/eyes/ears) is visible
1526
+ n_visible: number of visible keypoints
1527
+ """
1528
+ visible = kps[:, 2] >= kp_conf_thresh
1529
+ n_visible = int(visible.sum())
1530
+ score = float((visible.astype(np.float32) * POSE_KP_WEIGHTS).sum())
1531
+ has_head = bool(visible[POSE_HEAD_KP].any())
1532
+ return score, has_head, n_visible
1533
+
1534
+ def _refine_box_with_keypoints(self, pb, kps, ow, oh):
1535
+ """Blend person box with tight keypoint bbox."""
1536
+ visible = kps[:, 2] >= POSE_KP_CONF
1537
+ if not visible.any():
1538
+ return pb
1539
+ vis_kps = kps[visible]
1540
+ kp_x1 = float(vis_kps[:, 0].min())
1541
+ kp_y1 = float(vis_kps[:, 1].min())
1542
+ kp_x2 = float(vis_kps[:, 0].max())
1543
+ kp_y2 = float(vis_kps[:, 1].max())
1544
+
1545
+ # Pad around keypoint bbox
1546
+ kp_w = kp_x2 - kp_x1
1547
+ kp_h = kp_y2 - kp_y1
1548
+ pad_x = kp_w * POSE_KP_PAD
1549
+ pad_y = kp_h * POSE_KP_PAD
1550
+ kp_x1 = max(0, kp_x1 - pad_x)
1551
+ kp_y1 = max(0, kp_y1 - pad_y)
1552
+ kp_x2 = min(ow, kp_x2 + pad_x)
1553
+ kp_y2 = min(oh, kp_y2 + pad_y)
1554
+
1555
+ a = POSE_REFINE_BLEND
1556
+ return BoundingBox(
1557
+ x1=max(0, min(ow, int(pb.x1 * (1 - a) + kp_x1 * a))),
1558
+ y1=max(0, min(oh, int(pb.y1 * (1 - a) + kp_y1 * a))),
1559
+ x2=max(0, min(ow, int(pb.x2 * (1 - a) + kp_x2 * a))),
1560
+ y2=max(0, min(oh, int(pb.y2 * (1 - a) + kp_y2 * a))),
1561
+ cls_id=0,
1562
+ conf=pb.conf,
1563
+ )
1564
+
1565
+ def _pose_filter_refine(self, person_boxes, image_bgr):
1566
+ """Filter FP detections and refine boxes using anatomical keypoint scoring.
1567
+
1568
+ Anatomical scoring: weighted sum of visible keypoints where head/face
1569
+ keypoints (nose, eyes, ears) contribute most, upper body (shoulders,
1570
+ elbows, wrists) next, lower body (hips, knees, ankles) least.
1571
+
1572
+ Decision logic:
1573
+ 1. Run pose model once on full image.
1574
+ 2. Run face detector (if available) for additional confirmation.
1575
+ 3. Match each person detection to best-overlapping pose detection.
1576
+ 4. For matched boxes:
1577
+ a. Head keypoints visible OR face detected β†’ KEEP + refine (never suppress)
1578
+ b. Anatomical score >= REFINE threshold β†’ KEEP + refine
1579
+ c. Anatomical score > 0 β†’ KEEP as-is (partially visible person)
1580
+ d. Anatomical score == 0 + large + low-conf β†’ SUPPRESS (FP candidate)
1581
+ 5. For unmatched boxes:
1582
+ a. Face detected inside box β†’ KEEP
1583
+ b. Large + low-conf β†’ SUPPRESS
1584
+ c. Small or high-conf β†’ KEEP (SAHI-detected or confident)
1585
+ """
1586
+ if not person_boxes or self.pose_session is None:
1587
+ return person_boxes
1588
+
1589
+ oh, ow = image_bgr.shape[:2]
1590
+ img_area = float(oh * ow)
1591
+
1592
+ # Run pose model
1593
+ t_pose = time.monotonic()
1594
+ pose_boxes, pose_confs, pose_kps = self._pose_run(image_bgr)
1595
+ dt_pose = (time.monotonic() - t_pose) * 1000
1596
+
1597
+ # Cache pose data for motorcycle rider check in vehicle parts confirmation
1598
+ self._cached_pose_data = (pose_boxes, pose_kps)
1599
+
1600
+ # Run face detector if available
1601
+ face_boxes = np.empty((0, 4))
1602
+ if self.face_session is not None:
1603
+ t_face = time.monotonic()
1604
+ face_boxes, _ = self._face_run(image_bgr)
1605
+ dt_face = (time.monotonic() - t_face) * 1000
1606
+ logger.info(f"[pose] {len(pose_boxes)} pose, {len(face_boxes)} faces "
1607
+ f"in {dt_pose:.0f}+{dt_face:.0f}ms")
1608
+ else:
1609
+ logger.info(f"[pose] {len(pose_boxes)} pose detections in {dt_pose:.0f}ms")
1610
+
1611
+ # Helper: check if any face detection is inside a person box
1612
+ def has_face_inside(pb):
1613
+ if len(face_boxes) == 0:
1614
+ return False
1615
+ for fb in face_boxes:
1616
+ # Face center must be inside person box
1617
+ fcx = (fb[0] + fb[2]) / 2
1618
+ fcy = (fb[1] + fb[3]) / 2
1619
+ if pb.x1 <= fcx <= pb.x2 and pb.y1 <= fcy <= pb.y2:
1620
+ return True
1621
+ return False
1622
+
1623
+ if len(pose_boxes) == 0:
1624
+ # No pose detections β€” use face detector or size/conf heuristic
1625
+ result = []
1626
+ n_suppressed = 0
1627
+ for pb in person_boxes:
1628
+ if has_face_inside(pb):
1629
+ result.append(pb)
1630
+ continue
1631
+ bw = pb.x2 - pb.x1
1632
+ bh = pb.y2 - pb.y1
1633
+ area_ratio = (bw * bh) / img_area
1634
+ if area_ratio > POSE_FP_MIN_AREA and pb.conf < POSE_FP_MAX_CONF:
1635
+ n_suppressed += 1
1636
+ continue
1637
+ result.append(pb)
1638
+ if n_suppressed:
1639
+ logger.info(f"[pose] Suppressed {n_suppressed} FP (no pose detections)")
1640
+ return result
1641
+
1642
+ # Match person detections to pose detections via IoU
1643
+ result = []
1644
+ n_refined = 0
1645
+ n_suppressed = 0
1646
+ n_face_saved = 0
1647
+
1648
+ for pb in person_boxes:
1649
+ pb_arr = np.array([pb.x1, pb.y1, pb.x2, pb.y2], dtype=float)
1650
+ best_iou = 0.0
1651
+ best_idx = -1
1652
+
1653
+ for j in range(len(pose_boxes)):
1654
+ xx1 = max(pb_arr[0], pose_boxes[j, 0])
1655
+ yy1 = max(pb_arr[1], pose_boxes[j, 1])
1656
+ xx2 = min(pb_arr[2], pose_boxes[j, 2])
1657
+ yy2 = min(pb_arr[3], pose_boxes[j, 3])
1658
+ inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
1659
+ a1 = (pb_arr[2] - pb_arr[0]) * (pb_arr[3] - pb_arr[1])
1660
+ a2 = (pose_boxes[j, 2] - pose_boxes[j, 0]) * (pose_boxes[j, 3] - pose_boxes[j, 1])
1661
+ iou_val = inter / (a1 + a2 - inter + 1e-9)
1662
+ if iou_val > best_iou:
1663
+ best_iou = iou_val
1664
+ best_idx = j
1665
+
1666
+ if best_iou >= POSE_MATCH_IOU and best_idx >= 0:
1667
+ # Matched to a pose detection β€” compute anatomical score
1668
+ kps = pose_kps[best_idx] # [17, 3]
1669
+ anat_score, has_head, n_vis = self._anatomical_score(kps)
1670
+
1671
+ if has_head or has_face_inside(pb):
1672
+ # Head/face visible β†’ definitely a person, refine box
1673
+ result.append(self._refine_box_with_keypoints(pb, kps, ow, oh))
1674
+ n_refined += 1
1675
+ elif anat_score >= POSE_ANAT_REFINE_THRESH:
1676
+ # Good anatomical score β†’ person confirmed, refine
1677
+ result.append(self._refine_box_with_keypoints(pb, kps, ow, oh))
1678
+ n_refined += 1
1679
+ elif anat_score > POSE_ANAT_SUPPRESS_THRESH:
1680
+ # Some keypoints visible but low score β€” keep as-is
1681
+ result.append(pb)
1682
+ else:
1683
+ # Matched to pose bbox but ZERO keypoints visible
1684
+ # Only suppress if also large and low confidence
1685
+ bw = pb.x2 - pb.x1
1686
+ bh = pb.y2 - pb.y1
1687
+ area_ratio = (bw * bh) / img_area
1688
+ if area_ratio > POSE_FP_MIN_AREA and pb.conf < POSE_FP_MAX_CONF:
1689
+ n_suppressed += 1
1690
+ continue
1691
+ result.append(pb)
1692
+ else:
1693
+ # Not matched to any pose detection
1694
+ if has_face_inside(pb):
1695
+ # Face detector confirms a person
1696
+ result.append(pb)
1697
+ n_face_saved += 1
1698
+ continue
1699
+
1700
+ bw = pb.x2 - pb.x1
1701
+ bh = pb.y2 - pb.y1
1702
+ area_ratio = (bw * bh) / img_area
1703
+
1704
+ if area_ratio > POSE_FP_MIN_AREA and pb.conf < POSE_FP_MAX_CONF:
1705
+ # Large unmatched low-conf box β€” likely FP
1706
+ n_suppressed += 1
1707
+ continue
1708
+ else:
1709
+ # Small box or high conf β€” keep
1710
+ result.append(pb)
1711
+
1712
+ if n_refined or n_suppressed or n_face_saved:
1713
+ logger.info(f"[pose] Refined {n_refined}, suppressed {n_suppressed} FP, "
1714
+ f"face-saved {n_face_saved}, "
1715
+ f"kept {len(result)}/{len(person_boxes)}")
1716
+ return result
1717
+
1718
+ # ── Person inference with SAHI tiling ────────────────────────────────
1719
+
1720
  def _infer_person(self, image_bgr):
1721
+ """Person detection with SAHI-inspired tiled inference + dynamic NMS.
1722
 
1723
  Pipeline:
1724
+ 1. Full-image pass at native 960px
1725
  2. 2 tiled passes (higher effective resolution for small/distant people)
1726
  3. Flip TTA pass if time budget allows
1727
+ 4. Dynamic NMS merge (adapts IoU threshold to scene density)
1728
  5. Sanity filters
1729
+ 6. Pose FP filter + box refinement
1730
  """
1731
  oh, ow = image_bgr.shape[:2]
1732
  t_start = time.monotonic()
 
1735
  all_boxes = [] # list of [N, 4] arrays
1736
  all_confs = [] # list of [N] arrays
1737
 
1738
+ # Pass 1: full image at native 960px
1739
  boxes_full, confs_full = self._per_run_pass(image_bgr, PER_CONF_LOW)
1740
  if len(boxes_full) > 0:
1741
  all_boxes.append(boxes_full)
 
1768
  if not all_boxes:
1769
  return []
1770
 
1771
+ # Dynamic NMS: adapt IoU threshold to scene density
1772
  merged_b = np.concatenate(all_boxes)
1773
  merged_s = np.concatenate(all_confs)
1774
+ n_raw = len(merged_s)
1775
+ nms_iou = 0.60 if n_raw > 30 else (0.40 if n_raw < 10 else PER_NMS_IOU)
1776
+ merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, nms_iou)
1777
 
1778
  if len(merged_b) == 0:
1779
  return []
 
1802
  cls_id=0,
1803
  conf=max(0.0, min(1.0, float(merged_s[i]))),
1804
  ))
1805
+
1806
+ # Pose FP filter + box refinement (only if time budget allows)
1807
+ if time.monotonic() - t_start < PER_RTF_BUDGET * 0.85:
1808
+ out = self._pose_filter_refine(out, image_bgr)
1809
+
1810
  return out
1811
 
1812
  # ── Unified inference ───────────────────────────────────────────────────
1813
 
1814
  def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
1815
+ self._cached_pose_data = None # reset before each frame
1816
  if ENABLE_PARALLEL:
1817
  veh_future = self._executor.submit(self._infer_vehicle, image_bgr)
1818
  per_future = self._executor.submit(self._infer_person, image_bgr)
 
1821
  else:
1822
  vehicle_boxes = self._infer_vehicle(image_bgr)
1823
  person_boxes = self._infer_person(image_bgr)
1824
+
1825
+ # Vehicle parts confirmation: cross-reference with person detections
1826
+ vehicle_boxes = self._vehicle_parts_confirm(
1827
+ vehicle_boxes, person_boxes, image_bgr)
1828
+
1829
  return vehicle_boxes + person_boxes
1830
 
1831
  # -- Replay buffer -------------------------------------------------------
 
1903
  ).start()
1904
 
1905
  return results
1906
+ # Miner v3.15 β€” background TRT engine build + CUDA-first fallback 20260402
plate_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3bd9b9f88dd75dec77f974e2f3a81f6bbe689e0e5e587b32cc4c8881dad8034
3
+ size 1930779
pose_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be33b5f7c7f04052cff87ac9f3c7a56e6d2055c9524dae3a2dc9229be488afaa
3
+ size 6800452
vehicle_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00bae032cee689d04b3b9131cd80134d03c17972490190f45a5f2aa96f9b703a
3
+ size 21244589