scorevision: push artifact

Browse files

Files changed (6) hide show

chute_config.yml +12 -2
face_weights.onnx +3 -0
miner.py +1075 -49
plate_weights.onnx +3 -0
pose_weights.onnx +3 -0
vehicle_weights.onnx +3 -0

chute_config.yml CHANGED Viewed

@@ -3,13 +3,23 @@ Image:
   run_command:
   - pip install --upgrade setuptools wheel
   - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12' 'nvidia-cublas-cu12'
-    'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0'
-    'pyyaml>=6.0' 'aiohttp>=3.9' 'ensemble-boxes>=1.0' 'torch>=2.6,<3.0'
 NodeSelector:
   gpu_count: 1
   min_vram_gb_per_gpu: 16
   max_hourly_price_per_gpu: 2.0
   exclude:
   - '5090'
   - b200
   - h200

   run_command:
   - pip install --upgrade setuptools wheel
   - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12' 'nvidia-cublas-cu12'
+    'nvidia-cuda-runtime-cu12' 'nvidia-cufft-cu12' 'nvidia-curand-cu12'
+    'nvidia-cusolver-cu12' 'nvidia-cusparse-cu12' 'nvidia-nvjitlink-cu12'
+    'tensorrt>=10.0' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4'
+    'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'ensemble-boxes>=1.0' 'torch>=2.6,<3.0'
 NodeSelector:
   gpu_count: 1
   min_vram_gb_per_gpu: 16
   max_hourly_price_per_gpu: 2.0
   exclude:
+  - a40
+  - l4
+  - a5000
+  - a4000
+  - '3090'
+  - a4000_ada
+  - a10
+  - a6000
   - '5090'
   - b200
   - h200

face_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a
+size 2524817

miner.py CHANGED Viewed

@@ -1,20 +1,33 @@
 """
-Score Vision SN44 — Unified miner v3.7 (2026-04-02). SAHI-style tiled person inference.
-Dual-model: vehicle (YOLO11m INT8 1280) + person (YOLO26s FP16 960 end2end).
 Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
 Person weights loaded from primary HF repo (template downloads automatically).
 Vehicle model (vehicle_weights.onnx):
   Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
-  Remapped to output: 1=car, 4=bus(filtered), 2=truck, 3=motorcycle
-  Bus remapped to cls_id=4 to avoid collision with person cls_id=0.
 Person model (person_weights.onnx):
   YOLO26s FP16 960px end2end [1,300,6]. Single class: 0=person.
   SAHI-style tiling: full + 2 adaptive tiles + flip TTA, max-conf NMS merge.
-Both models run on every image. All detections merged.
-Vehicle cls_id=4 (bus) filtered by validator (out of range for both elements).
 Vehicle eval uses cls_id 1-3. Person eval uses cls_id 0 only.
 """
@@ -26,10 +39,13 @@ import logging as _logging
 _cuda_log = _logging.getLogger(__name__)
 def _preload_cuda_libs():
-    """Pre-load CUDA libs from pip nvidia packages so onnxruntime-gpu finds them."""
     try:
         lib_dirs = []
-        for mod_name in ['nvidia.cudnn', 'nvidia.cublas']:
             try:
                 mod = __import__(mod_name, fromlist=['__file__'])
                 lib_dir = os.path.join(os.path.dirname(mod.__file__), 'lib')
@@ -37,6 +53,30 @@ def _preload_cuda_libs():
                     lib_dirs.append(lib_dir)
             except ImportError:
                 pass
         if not lib_dirs:
             return
         existing = os.environ.get('LD_LIBRARY_PATH', '')
@@ -45,11 +85,10 @@ def _preload_cuda_libs():
             for so in sorted(_glob.glob(os.path.join(lib_dir, 'lib*.so*'))):
                 try:
                     ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
-                    _cuda_log.info(f'Preloaded CUDA lib: {os.path.basename(so)}')
                 except OSError:
                     pass
     except Exception as e:
-        _cuda_log.warning(f'CUDA preload error: {e}')
 _preload_cuda_libs()
@@ -164,18 +203,72 @@ logger = logging.getLogger(__name__)
 # ── Vehicle config ──────────────���───────────────────────────────────────────
 VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 4, 2: 2, 3: 3}  # bus→4 (avoid person cls_id=0 collision)
 VEH_NUM_CLASSES = 4
-VEH_CONF_THRES = 0.35
-VEH_TTA_CONF = 0.10
 VEH_NMS_IOU = 0.50
-# ── Vehicle box sanity filters ─────────────────────────────────────────────
 VEH_MIN_WH = 8
-VEH_MIN_AREA = 196
 VEH_MAX_ASPECT = 8.0
 VEH_MAX_AREA_RATIO = 0.95
 VEH_MAX_DET = 150
 # ── Person config (TTA consensus) ───────────────────────────────────────────
 PER_CONF_LOW = 0.45
 PER_CONF_HIGH = 0.58
@@ -194,6 +287,51 @@ PER_TILE_MIN_DIM_RATIO = 1.15   # tile when image dim > model_dim * this (~1104p
 PER_TILE_CONF = 0.40            # lower threshold for tile passes (NMS handles FP)
 PER_NMS_IOU = 0.50              # NMS IoU for merging across passes (max-conf wins)
 # ── Shared ──────────────────────────────────────────────────────────────────
 WBF_SKIP_THR = 0.0001
@@ -385,6 +523,7 @@ class Miner:
         # Vehicle model — download from secondary HF repo with safety guard
         t0 = time.monotonic()
         try:
             from huggingface_hub import snapshot_download as _sd
             veh_path = Path(_sd(VEHICLE_HF_REPO))
@@ -406,15 +545,75 @@ class Miner:
         self.veh_h = int(veh_shape[2])
         self.veh_w = int(veh_shape[3])
-        # Person model — from primary HF repo (template downloads automatically)
         self.per_session = ort.InferenceSession(
-            str(path_hf_repo / "person_weights.onnx"),
             providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
         )
         self.per_input_name = self.per_session.get_inputs()[0].name
         per_shape = self.per_session.get_inputs()[0].shape
         self.per_h = int(per_shape[2])
         self.per_w = int(per_shape[3])
         # Thread pool for parallel inference
         self._executor = ThreadPoolExecutor(max_workers=2)
@@ -423,11 +622,61 @@ class Miner:
         veh_prov = self.veh_session.get_providers()
         per_prov = self.per_session.get_providers()
         logger.info(f"Vehicle ORT providers: {veh_prov}")
-        logger.info(f"Person ORT providers: {per_prov}")
         logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
     def __repr__(self) -> str:
-        return "Unified Miner v3 — dual-model vehicle+person (repo-split, parallel, TTA-configurable)"
     # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
@@ -476,23 +725,31 @@ class Miner:
         return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
     def _infer_vehicle(self, image_bgr):
         oh, ow = image_bgr.shape[:2]
-        if ENABLE_TTA:
-            boxes1, confs1, cls1 = self._veh_run_pass(image_bgr, VEH_TTA_CONF)
-            flipped = cv2.flip(image_bgr, 1)
-            boxes2, confs2, cls2 = self._veh_run_pass(flipped, VEH_TTA_CONF)
-            if len(boxes2):
-                boxes2[:, 0], boxes2[:, 2] = ow - boxes2[:, 2], ow - boxes2[:, 0]
-            parts = [(b, s, c) for b, s, c in
-                     [(boxes1, confs1, cls1), (boxes2, confs2, cls2)] if len(b)]
-            if not parts:
-                return []
-            boxes = np.concatenate([p[0] for p in parts])
-            confs = np.concatenate([p[1] for p in parts])
-            cls_ids = np.concatenate([p[2] for p in parts])
-        else:
-            boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES)
         if len(boxes) == 0:
             return []
@@ -507,27 +764,45 @@ class Miner:
         if len(boxes) == 0:
             return []
-        # Global confidence filter (needed after TTA where lower threshold was used)
-        keep = confs >= VEH_CONF_THRES
-        if not keep.any():
-            return []
-        boxes, confs, out_cls = boxes[keep], confs[keep], out_cls[keep]
-        # Sanity filters
         img_area = float(oh * ow)
         sane = []
         for i in range(len(boxes)):
             bw = boxes[i, 2] - boxes[i, 0]
             bh = boxes[i, 3] - boxes[i, 1]
             if bw < VEH_MIN_WH or bh < VEH_MIN_WH:
                 continue
             area = bw * bh
-            if area < VEH_MIN_AREA:
                 continue
-            if max(bw, bh) / max(min(bw, bh), 1e-6) > VEH_MAX_ASPECT:
                 continue
             if area / img_area > VEH_MAX_AREA_RATIO:
                 continue
             sane.append(i)
         if not sane:
@@ -552,6 +827,373 @@ class Miner:
             ))
         return out
     # ── Person preprocessing (letterbox) ──────────────────────────────────
     def _per_letterbox(self, img):
@@ -705,15 +1347,386 @@ class Miner:
         return np.array(keep_b), np.array(keep_s)
     def _infer_person(self, image_bgr):
-        """Person detection with SAHI-inspired tiled inference.
         Pipeline:
-        1. Full-image pass (catches large/medium people, low effective resolution)
         2. 2 tiled passes (higher effective resolution for small/distant people)
         3. Flip TTA pass if time budget allows
-        4. Max-confidence NMS merge (preserves sharp scores for FP scoring)
         5. Sanity filters
         """
         oh, ow = image_bgr.shape[:2]
         t_start = time.monotonic()
@@ -722,7 +1735,7 @@ class Miner:
         all_boxes = []   # list of [N, 4] arrays
         all_confs = []   # list of [N] arrays
-        # Pass 1: full image
         boxes_full, confs_full = self._per_run_pass(image_bgr, PER_CONF_LOW)
         if len(boxes_full) > 0:
             all_boxes.append(boxes_full)
@@ -755,10 +1768,12 @@ class Miner:
         if not all_boxes:
             return []
-        # Merge all detections with max-confidence NMS
         merged_b = np.concatenate(all_boxes)
         merged_s = np.concatenate(all_confs)
-        merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, PER_NMS_IOU)
         if len(merged_b) == 0:
             return []
@@ -787,11 +1802,17 @@ class Miner:
                 cls_id=0,
                 conf=max(0.0, min(1.0, float(merged_s[i]))),
             ))
         return out
     # ── Unified inference ───────────────────────────────────────────────────
     def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
         if ENABLE_PARALLEL:
             veh_future = self._executor.submit(self._infer_vehicle, image_bgr)
             per_future = self._executor.submit(self._infer_person, image_bgr)
@@ -800,6 +1821,11 @@ class Miner:
         else:
             vehicle_boxes = self._infer_vehicle(image_bgr)
             person_boxes = self._infer_person(image_bgr)
         return vehicle_boxes + person_boxes
     # -- Replay buffer -------------------------------------------------------
@@ -877,4 +1903,4 @@ class Miner:
         ).start()
         return results
-# Miner v3.4 — consensus TTA, per-class NMS boost, vehicle sanity filters, floor/ceil decode 20260401

 """
+Score Vision SN44 — Unified miner v3.15 (2026-04-02). Background TRT engine build.
+Dual-model: vehicle (YOLO11m INT8 1280, CUDA) + person (YOLO26s FP16 960 end2end, TRT).
+Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
 Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
 Person weights loaded from primary HF repo (template downloads automatically).
 Vehicle model (vehicle_weights.onnx):
   Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
+  Output: 1=car, 2=truck, 3=motorcycle. Bus (cls_id=4) SUPPRESSED — not scored by validator.
+  Per-class confidence thresholds: car 0.45, truck 0.45, motorcycle 0.35.
+  Per-class aspect ratio bounds for FP filtering.
+  Flip TTA always enabled — compensates for higher confidence thresholds.
 Person model (person_weights.onnx):
   YOLO26s FP16 960px end2end [1,300,6]. Single class: 0=person.
+  Background TRT build: starts on CUDA immediately, builds TRT FP16 engine in background
+  thread (~18min on fresh node), swaps to TRT atomically when ready. Cached thereafter.
   SAHI-style tiling: full + 2 adaptive tiles + flip TTA, max-conf NMS merge.
+Pose model (pose_weights.onnx):
+  YOLOv8n-pose FP16 640px [1,56,8400]. 17 COCO keypoints.
+  Runs once on full image after person detection.
+  Anatomical keypoint scoring: weighted per-keypoint sum (head 0.38, upper 0.32, lower 0.30).
+  1. Head keypoints visible → never suppress, always refine box.
+  2. Score >= 0.15 → keep + refine. Score > 0 → keep as-is. Score == 0 + large + low-conf → suppress.
+  3. Box refinement: blend detected box with tight keypoint bbox for better fit.
+  Face detector (optional): if face_session loaded, face inside box → never suppress.
+Both vehicle + person models run on every image. All detections merged.
 Vehicle eval uses cls_id 1-3. Person eval uses cls_id 0 only.
 """
 _cuda_log = _logging.getLogger(__name__)
 def _preload_cuda_libs():
+    """Pre-load CUDA + TensorRT libs from pip packages so ORT GPU/TRT providers work."""
     try:
         lib_dirs = []
+        # CUDA libs from nvidia pip packages
+        for mod_name in ['nvidia.cudnn', 'nvidia.cublas', 'nvidia.cuda_runtime',
+                         'nvidia.cufft', 'nvidia.curand', 'nvidia.cusolver',
+                         'nvidia.cusparse', 'nvidia.nvjitlink']:
             try:
                 mod = __import__(mod_name, fromlist=['__file__'])
                 lib_dir = os.path.join(os.path.dirname(mod.__file__), 'lib')
                     lib_dirs.append(lib_dir)
             except ImportError:
                 pass
+        # TensorRT libs — search site-packages for tensorrt_libs directory
+        import sys as _sys_inner
+        _trt_found = False
+        for p in _sys_inner.path:
+            candidate = os.path.join(p, 'tensorrt_libs')
+            if os.path.isdir(candidate):
+                lib_dirs.append(candidate)
+                _trt_found = True
+                break
+        # Broader search if not found in sys.path
+        if not _trt_found:
+            for base in ['/usr/local/lib', '/usr/lib', os.path.expanduser('~/.local/lib'),
+                         '/home/miner/.local/lib']:
+                for root, dirs, _ in os.walk(base):
+                    if 'tensorrt_libs' in dirs:
+                        lib_dirs.append(os.path.join(root, 'tensorrt_libs'))
+                        _trt_found = True
+                        break
+                    if root.count(os.sep) - base.count(os.sep) > 4:
+                        break
+                if _trt_found:
+                    break
         if not lib_dirs:
             return
         existing = os.environ.get('LD_LIBRARY_PATH', '')
             for so in sorted(_glob.glob(os.path.join(lib_dir, 'lib*.so*'))):
                 try:
                     ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
                 except OSError:
                     pass
     except Exception as e:
+        _cuda_log.warning(f'CUDA/TRT preload error: {e}')
 _preload_cuda_libs()
 # ── Vehicle config ──────────────���───────────────────────────────────────────
 VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 4, 2: 2, 3: 3}  # bus→4 (avoid person cls_id=0 collision)
+VEH_SKIP_CLS = {4}            # Bus: not scored by validator, just generates FP. Skip entirely.
 VEH_NUM_CLASSES = 4
+VEH_CONF_THRES = 0.30         # Low decode threshold for TTA (final filter is per-class)
+VEH_TTA_CONF = 0.20           # TTA flip pass decode threshold
 VEH_NMS_IOU = 0.50
+# ── Per-class vehicle confidence thresholds (output cls_id) ────────────────
+# Raising from uniform 0.35: reduces FP (avg 4.1 FFPI → target <2.0)
+VEH_CLASS_CONF: dict[int, float] = {
+    1: 0.45,    # car — most FP-prone class (75% of training data, overconfident)
+    2: 0.45,    # truck — moderate raise
+    3: 0.35,    # motorcycle — keep lower (small targets, easy to miss)
+    4: 1.0,     # bus — effectively suppressed (not scored anyway)
+}
+# ── Per-class vehicle aspect ratio bounds (min_ratio, max_ratio) ───────────
+# ratio = max(w,h) / min(w,h). Generous bounds to avoid suppressing valid detections.
+VEH_CLASS_ASPECT: dict[int, float] = {
+    1: 5.0,     # car — rarely > 5:1 from any angle
+    2: 6.0,     # truck — can be elongated
+    3: 4.5,     # motorcycle — compact, rarely very elongated
+    4: 8.0,     # bus (filtered anyway)
+}
+# ── Per-class minimum area (pixels) ───────────────────────────────────────
+VEH_CLASS_MIN_AREA: dict[int, int] = {
+    1: 196,     # car — 14x14 min
+    2: 256,     # truck — 16x16 min (should be at least medium-sized)
+    3: 100,     # motorcycle — 10x10 min (can be very small in distance)
+    4: 400,     # bus — 20x20 min
+}
+# ── Vehicle box sanity filters (global fallbacks) ─────────────────────────
 VEH_MIN_WH = 8
+VEH_MIN_AREA = 100
 VEH_MAX_ASPECT = 8.0
 VEH_MAX_AREA_RATIO = 0.95
 VEH_MAX_DET = 150
+# ── Vehicle parts confirmation config ────────────────────────────────────
+# Cross-validates vehicle detections using person detections, OpenCV analysis,
+# and optional license plate detector. Small/distant vehicles exempt.
+VEH_PARTS_ENABLED = True          # Master switch for parts confirmation
+VEH_PARTS_SMALL_AREA = 0.004     # Below this area ratio: exempt from suppression
+VEH_PARTS_FP_CONF = 0.50         # Below this conf + large + unconfirmed → suppress
+VEH_PARTS_FP_CONF_STRICT = 0.55  # Stricter threshold when plate model loaded but no plate
+VEH_PARTS_FP_AREA = 0.03         # Above this area ratio → eligible for FP suppression
+# Confidence boosts for confirmed parts (additive)
+VEH_PARTS_BOOST_DRIVER = 0.08    # Person in driver/passenger region
+VEH_PARTS_BOOST_RIDER = 0.10     # Person on motorcycle (overlap + optional lean)
+VEH_PARTS_BOOST_HL = 0.05        # Headlight pair detected
+VEH_PARTS_BOOST_PLATE = 0.12     # License plate detected (Phase 2)
+VEH_PARTS_BOOST_WINDOW = 0.06    # Bus window pattern on truck
+# Headlight detection thresholds
+VEH_PARTS_HL_MIN_PX = 60         # Min vehicle width (px) for headlight check
+VEH_PARTS_HL_BRIGHT = 200        # Grayscale threshold for bright spots
+VEH_PARTS_HL_MIN_BLOB = 15       # Min contour area for headlight candidate
+# Window pattern detection (bus/coach)
+VEH_PARTS_WINDOW_MIN_PX = 100    # Min vehicle width for window pattern check
+VEH_PARTS_WINDOW_MIN_PEAKS = 3   # Min periodic edge peaks for window confirmation
+# Motorcycle rider pose
+VEH_PARTS_RIDER_LEAN_DEG = 15.0  # Min torso lean from vertical (degrees) for rider pose
+# Plate detection thresholds
+VEH_PARTS_PLATE_MIN_PX = 120  # only check plates on medium+ vehicles      # Min vehicle width for plate detection
+VEH_PARTS_PLATE_CONF = 0.35      # Min plate detection confidence
 # ── Person config (TTA consensus) ───────────────────────────────────────────
 PER_CONF_LOW = 0.45
 PER_CONF_HIGH = 0.58
 PER_TILE_CONF = 0.40            # lower threshold for tile passes (NMS handles FP)
 PER_NMS_IOU = 0.50              # NMS IoU for merging across passes (max-conf wins)
+# ── Pose FP filter + box refinement config ──────────────────────────────────
+POSE_CONF_THRESH = 0.25         # Minimum confidence for pose detection
+POSE_NMS_IOU = 0.65             # NMS IoU threshold for pose detections
+POSE_MATCH_IOU = 0.30           # IoU threshold to match pose to person box
+POSE_KP_CONF = 0.3              # Keypoint visibility threshold
+POSE_FP_MAX_CONF = 0.65         # Max conf below which unmatched large boxes are suppressed
+POSE_FP_MIN_AREA = 0.04         # Min area ratio (of image) for FP suppression to apply
+POSE_REFINE_BLEND = 0.25        # Blend factor for keypoint box refinement (0=original, 1=keypoint)
+POSE_KP_PAD = 0.10              # Padding around keypoint tight bbox
+# ── Anatomical keypoint scoring ─────────────────────────────────────────────
+# COCO keypoints: 0=nose 1=l_eye 2=r_eye 3=l_ear 4=r_ear
+#   5=l_shoulder 6=r_shoulder 7=l_elbow 8=r_elbow 9=l_wrist 10=r_wrist
+#   11=l_hip 12=r_hip 13=l_knee 14=r_knee 15=l_ankle 16=r_ankle
+POSE_HEAD_KP = [0, 1, 2, 3, 4]                  # nose + eyes + ears
+POSE_UPPER_KP = [5, 6, 7, 8, 9, 10]             # shoulders + elbows + wrists
+POSE_LOWER_KP = [11, 12, 13, 14, 15, 16]        # hips + knees + ankles
+# Per-keypoint weights (head > upper > lower). Sum of all = 1.0.
+POSE_KP_WEIGHTS = np.array([
+    0.12,  # 0  nose        — strongest single indicator
+    0.08,  # 1  left_eye
+    0.08,  # 2  right_eye
+    0.05,  # 3  left_ear
+    0.05,  # 4  right_ear
+    0.07,  # 5  left_shoulder
+    0.07,  # 6  right_shoulder
+    0.05,  # 7  left_elbow
+    0.05,  # 8  right_elbow
+    0.04,  # 9  left_wrist
+    0.04,  # 10 right_wrist
+    0.05,  # 11 left_hip
+    0.05,  # 12 right_hip
+    0.04,  # 13 left_knee
+    0.04,  # 14 right_knee
+    0.03,  # 15 left_ankle
+    0.04,  # 16 right_ankle
+], dtype=np.float32)  # sums to 1.0
+POSE_ANAT_REFINE_THRESH = 0.15  # Score above which we refine box with keypoints
+POSE_ANAT_SUPPRESS_THRESH = 0.0 # Score at or below which suppression is considered
+# ── TensorRT engine cache config ────────────────────────────────────────────
+TRT_CACHE_PATH = "/tmp/trt_engine_cache"
+TRT_FP16 = True
+TRT_WORKSPACE_GB = 4
 # ── Shared ──────────────────────────────────────────────────────────────────
 WBF_SKIP_THR = 0.0001
         # Vehicle model — download from secondary HF repo with safety guard
         t0 = time.monotonic()
+        veh_path = None  # Path to secondary repo snapshot (also used for plate model)
         try:
             from huggingface_hub import snapshot_download as _sd
             veh_path = Path(_sd(VEHICLE_HF_REPO))
         self.veh_h = int(veh_shape[2])
         self.veh_w = int(veh_shape[3])
+        # Person model — CUDA immediately, TRT engine builds in background
+        per_onnx = str(path_hf_repo / "person_weights.onnx")
         self.per_session = ort.InferenceSession(
+            per_onnx,
             providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
         )
         self.per_input_name = self.per_session.get_inputs()[0].name
         per_shape = self.per_session.get_inputs()[0].shape
         self.per_h = int(per_shape[2])
         self.per_w = int(per_shape[3])
+        self._trt_ready = False
+        logger.info("[init] Person model: CUDA (TRT build starting in background)")
+        # Launch background TRT engine build
+        os.makedirs(TRT_CACHE_PATH, exist_ok=True)
+        threading.Thread(
+            target=self._build_trt_engine,
+            args=(per_onnx,),
+            daemon=True,
+            name="trt-builder",
+        ).start()
+        # Pose model — for FP filtering + box refinement
+        pose_path = path_hf_repo / "pose_weights.onnx"
+        if pose_path.exists():
+            self.pose_session = ort.InferenceSession(
+                str(pose_path),
+                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+            )
+            self.pose_input_name = self.pose_session.get_inputs()[0].name
+            pose_shape = self.pose_session.get_inputs()[0].shape
+            self.pose_h = int(pose_shape[2])
+            self.pose_w = int(pose_shape[3])
+            logger.info(f"[init] Pose model loaded: {self.pose_h}x{self.pose_w}")
+        else:
+            self.pose_session = None
+            logger.info("[init] No pose model found, FP filter disabled")
+        # Face detector (SCRFD-500M) — confirms person boxes, prevents FP suppression
+        face_path = path_hf_repo / "face_weights.onnx"
+        if face_path.exists():
+            self.face_session = ort.InferenceSession(
+                str(face_path),
+                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+            )
+            self.face_input_name = self.face_session.get_inputs()[0].name
+            logger.info("[init] Face model (SCRFD-500M) loaded")
+        else:
+            self.face_session = None
+            logger.info("[init] No face model found")
+        # License plate detector — loaded from secondary HF repo alongside vehicle weights
+        plate_path = veh_path / "plate_weights.onnx" if veh_path else None
+        if plate_path and plate_path.exists():
+            self.plate_session = ort.InferenceSession(
+                str(plate_path),
+                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+            )
+            self.plate_input_name = self.plate_session.get_inputs()[0].name
+            plate_shape = self.plate_session.get_inputs()[0].shape
+            self.plate_h = int(plate_shape[2]) if isinstance(plate_shape[2], int) else 640
+            self.plate_w = int(plate_shape[3]) if isinstance(plate_shape[3], int) else 640
+            logger.info(f"[init] Plate model loaded: {self.plate_h}x{self.plate_w}")
+        else:
+            self.plate_session = None
+            logger.info("[init] No plate model found, plate confirmation disabled")
+        # Pose cache — populated by _pose_filter_refine, read by vehicle parts
+        self._cached_pose_data = None
         # Thread pool for parallel inference
         self._executor = ThreadPoolExecutor(max_workers=2)
         veh_prov = self.veh_session.get_providers()
         per_prov = self.per_session.get_providers()
         logger.info(f"Vehicle ORT providers: {veh_prov}")
+        logger.info(f"Person ORT providers: {per_prov} (TRT building in background)")
         logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
+    def _build_trt_engine(self, per_onnx):
+        """Build TRT FP16 engine in background, swap person session when ready.
+        On fresh nodes: ~18 min to compile. Cached engine loads in <1s.
+        During build, inference uses CUDAExecutionProvider (passes RTF at ~78ms).
+        After build, atomically swaps to TRT session (~29ms pipeline).
+        """
+        try:
+            trt_opts = {
+                "trt_fp16_enable": str(TRT_FP16).lower(),
+                "trt_max_workspace_size": str(TRT_WORKSPACE_GB << 30),
+                "trt_engine_cache_enable": "true",
+                "trt_engine_cache_path": TRT_CACHE_PATH,
+            }
+            t0 = time.monotonic()
+            logger.info("[trt-build] Creating TRT session (may take ~18min on fresh node)...")
+            trt_session = ort.InferenceSession(
+                per_onnx,
+                providers=[
+                    ("TensorrtExecutionProvider", trt_opts),
+                    "CUDAExecutionProvider",
+                    "CPUExecutionProvider",
+                ],
+            )
+            provs = trt_session.get_providers()
+            if "TensorrtExecutionProvider" not in provs:
+                logger.warning("[trt-build] TRT provider not active (%s), keeping CUDA", provs)
+                return
+            # Run dummy inference to fully materialize the engine
+            inp_name = trt_session.get_inputs()[0].name
+            inp_shape = trt_session.get_inputs()[0].shape
+            dummy = np.zeros((1, 3, int(inp_shape[2]), int(inp_shape[3])), dtype=np.float32)
+            trt_session.run(None, {inp_name: dummy})
+            dt = time.monotonic() - t0
+            logger.info("[trt-build] TRT engine ready in %.1fs — swapping person session", dt)
+            # Atomic swap — Python GIL makes single attribute assignment safe.
+            # Any in-flight inference holds a reference to the old session, which
+            # stays alive until that inference completes.
+            self.per_session = trt_session
+            self._trt_ready = True
+            logger.info("[trt-build] Person model now using TensorRT FP16")
+        except Exception as e:
+            logger.warning("[trt-build] TRT build failed (%s), keeping CUDA", e)
     def __repr__(self) -> str:
+        trt_status = "TRT" if self._trt_ready else "CUDA (TRT building)"
+        return f"Unified Miner v3.15 — person={trt_status}, background TRT engine build"
     # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
         return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
     def _infer_vehicle(self, image_bgr):
+        """Vehicle detection with flip TTA, per-class confidence, and aspect ratio filtering.
+        Pipeline:
+        1. Original pass at VEH_CONF_THRES
+        2. Flip TTA pass at VEH_TTA_CONF (always enabled)
+        3. Remap classes, per-class NMS
+        4. Per-class confidence filter (higher thresholds reduce FP)
+        5. Per-class aspect ratio filter
+        6. Skip bus (cls_id=4, not scored by validator)
+        """
         oh, ow = image_bgr.shape[:2]
+        # Always run flip TTA for vehicle — compensates for higher per-class thresholds
+        boxes1, confs1, cls1 = self._veh_run_pass(image_bgr, VEH_CONF_THRES)
+        flipped = cv2.flip(image_bgr, 1)
+        boxes2, confs2, cls2 = self._veh_run_pass(flipped, VEH_TTA_CONF)
+        if len(boxes2):
+            boxes2[:, 0], boxes2[:, 2] = ow - boxes2[:, 2], ow - boxes2[:, 0]
+        parts = [(b, s, c) for b, s, c in
+                 [(boxes1, confs1, cls1), (boxes2, confs2, cls2)] if len(b)]
+        if not parts:
+            return []
+        boxes = np.concatenate([p[0] for p in parts])
+        confs = np.concatenate([p[1] for p in parts])
+        cls_ids = np.concatenate([p[2] for p in parts])
         if len(boxes) == 0:
             return []
         if len(boxes) == 0:
             return []
+        # Per-class confidence filter + aspect ratio filter + bus suppression
         img_area = float(oh * ow)
         sane = []
         for i in range(len(boxes)):
+            cls = int(out_cls[i])
+            # Skip bus entirely (not scored by validator, just generates FP)
+            if cls in VEH_SKIP_CLS:
+                continue
+            # Per-class confidence threshold
+            min_conf = VEH_CLASS_CONF.get(cls, VEH_CONF_THRES)
+            if confs[i] < min_conf:
+                continue
             bw = boxes[i, 2] - boxes[i, 0]
             bh = boxes[i, 3] - boxes[i, 1]
+            # Minimum dimension
             if bw < VEH_MIN_WH or bh < VEH_MIN_WH:
                 continue
             area = bw * bh
+            # Per-class minimum area
+            min_area = VEH_CLASS_MIN_AREA.get(cls, VEH_MIN_AREA)
+            if area < min_area:
                 continue
+            # Per-class aspect ratio filter
+            aspect = max(bw, bh) / max(min(bw, bh), 1e-6)
+            max_aspect = VEH_CLASS_ASPECT.get(cls, VEH_MAX_ASPECT)
+            if aspect > max_aspect:
                 continue
+            # Max area ratio (covers entire image — likely FP)
             if area / img_area > VEH_MAX_AREA_RATIO:
                 continue
             sane.append(i)
         if not sane:
             ))
         return out
+    # ── Vehicle parts confirmation ───────────────────────────────────────
+    @staticmethod
+    def _veh_check_driver(vb, person_boxes):
+        """Check if any person detection overlaps the driver/passenger region.
+        Driver region: upper 55% height, center 70% width of vehicle box.
+        A person's center inside this region → vehicle confirmed.
+        """
+        if not person_boxes:
+            return False
+        vw = vb.x2 - vb.x1
+        vh = vb.y2 - vb.y1
+        dr_x1 = vb.x1 + vw * 0.15
+        dr_y1 = vb.y1
+        dr_x2 = vb.x2 - vw * 0.15
+        dr_y2 = vb.y1 + vh * 0.55
+        for pb in person_boxes:
+            pcx = (pb.x1 + pb.x2) / 2
+            pcy = (pb.y1 + pb.y2) / 2
+            if dr_x1 <= pcx <= dr_x2 and dr_y1 <= pcy <= dr_y2:
+                return True
+        return False
+    def _veh_check_rider(self, moto_box, person_boxes):
+        """Check if motorcycle has a rider, optionally with forward-lean pose.
+        Returns (has_overlap, has_lean_pose).
+        Uses cached pose keypoints from person pipeline to check torso angle.
+        Motorcycle riders lean forward (torso > 15° from vertical).
+        """
+        if not person_boxes:
+            return False, False
+        mw = moto_box.x2 - moto_box.x1
+        mh = moto_box.y2 - moto_box.y1
+        mx = mw * 0.1
+        my = mh * 0.1
+        has_overlap = False
+        for pb in person_boxes:
+            pcx = (pb.x1 + pb.x2) / 2
+            pcy = (pb.y1 + pb.y2) / 2
+            if (moto_box.x1 - mx <= pcx <= moto_box.x2 + mx and
+                    moto_box.y1 - my <= pcy <= moto_box.y2 + my):
+                has_overlap = True
+                break
+        if not has_overlap:
+            return False, False
+        # Check forward-lean pose using cached pose data
+        if self._cached_pose_data is None:
+            return True, False
+        pose_boxes, pose_kps = self._cached_pose_data
+        if len(pose_boxes) == 0:
+            return True, False
+        for j in range(len(pose_boxes)):
+            pb = pose_boxes[j]
+            pcx = (pb[0] + pb[2]) / 2
+            pcy = (pb[1] + pb[3]) / 2
+            if not (moto_box.x1 - mx <= pcx <= moto_box.x2 + mx and
+                    moto_box.y1 - my <= pcy <= moto_box.y2 + my):
+                continue
+            kps = pose_kps[j]
+            # Need at least one shoulder + one hip visible
+            l_sh, r_sh = kps[5], kps[6]
+            l_hip, r_hip = kps[11], kps[12]
+            sh_vis = [k[:2] for k in [l_sh, r_sh] if k[2] >= POSE_KP_CONF]
+            hip_vis = [k[:2] for k in [l_hip, r_hip] if k[2] >= POSE_KP_CONF]
+            if not sh_vis or not hip_vis:
+                continue
+            sh_mid = np.mean(sh_vis, axis=0)
+            hip_mid = np.mean(hip_vis, axis=0)
+            dx = sh_mid[0] - hip_mid[0]
+            dy = hip_mid[1] - sh_mid[1]  # positive = shoulder above hip
+            if dy <= 0:
+                continue
+            angle = math.degrees(math.atan2(abs(dx), dy))
+            if angle >= VEH_PARTS_RIDER_LEAN_DEG:
+                return True, True
+        return True, False
+    def _veh_check_headlights(self, vb, image_bgr):
+        """Detect bright symmetric pair in lower portion of vehicle box.
+        Requires two bright blobs at similar y, on opposite sides of center,
+        with similar area. Only checks vehicles wider than VEH_PARTS_HL_MIN_PX.
+        """
+        bw = vb.x2 - vb.x1
+        bh = vb.y2 - vb.y1
+        if bw < VEH_PARTS_HL_MIN_PX or bh < 30:
+            return False
+        oh, ow = image_bgr.shape[:2]
+        y1 = max(0, min(oh, int(vb.y1 + bh * 0.65)))
+        y2 = max(0, min(oh, int(vb.y2)))
+        x1 = max(0, min(ow, int(vb.x1)))
+        x2 = max(0, min(ow, int(vb.x2)))
+        if y2 - y1 < 5 or x2 - x1 < 10:
+            return False
+        roi = image_bgr[y1:y2, x1:x2]
+        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+        _, bright = cv2.threshold(gray, VEH_PARTS_HL_BRIGHT, 255, cv2.THRESH_BINARY)
+        contours, _ = cv2.findContours(bright, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        blobs = []
+        for c in contours:
+            area = cv2.contourArea(c)
+            if area < VEH_PARTS_HL_MIN_BLOB:
+                continue
+            M = cv2.moments(c)
+            if M["m00"] < 1:
+                continue
+            blobs.append((M["m10"] / M["m00"], M["m01"] / M["m00"], area))
+        if len(blobs) < 2:
+            return False
+        roi_mid = (x2 - x1) / 2.0
+        roi_h = y2 - y1
+        for i in range(len(blobs)):
+            for j in range(i + 1, len(blobs)):
+                b1, b2 = blobs[i], blobs[j]
+                if abs(b1[1] - b2[1]) > roi_h * 0.4:
+                    continue
+                if max(b1[2], b2[2]) / max(min(b1[2], b2[2]), 1) > 3.0:
+                    continue
+                if (b1[0] - roi_mid) * (b2[0] - roi_mid) < 0:
+                    return True
+        return False
+    def _veh_check_windows(self, vb, image_bgr):
+        """Detect repeated window pattern (bus/coach signature) using vertical edge periodicity.
+        Extracts middle horizontal band, applies vertical Sobel, projects vertically,
+        and checks for 3+ regularly-spaced peaks (window frame edges).
+        Only for large vehicles (truck cls_id=2).
+        """
+        bw = vb.x2 - vb.x1
+        bh = vb.y2 - vb.y1
+        if bw < VEH_PARTS_WINDOW_MIN_PX or bh < 40:
+            return False
+        oh, ow = image_bgr.shape[:2]
+        # Middle 40% of height (window band on a bus/coach)
+        y1 = max(0, min(oh, int(vb.y1 + bh * 0.30)))
+        y2 = max(0, min(oh, int(vb.y1 + bh * 0.70)))
+        x1 = max(0, min(ow, int(vb.x1)))
+        x2 = max(0, min(ow, int(vb.x2)))
+        if y2 - y1 < 10 or x2 - x1 < 30:
+            return False
+        roi = image_bgr[y1:y2, x1:x2]
+        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+        # Vertical edge detection (window frames are vertical edges)
+        sobel_v = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+        abs_sobel = np.abs(sobel_v)
+        # Project vertically: mean per column
+        projection = abs_sobel.mean(axis=0)
+        if len(projection) < 10:
+            return False
+        # Smooth projection
+        ks = max(3, int(len(projection) * 0.02) | 1)
+        projection = np.convolve(projection, np.ones(ks) / ks, mode='same')
+        # Find peaks above mean + 1 std
+        thresh = projection.mean() + projection.std()
+        peaks = []
+        in_peak = False
+        pk_start = 0
+        for i in range(len(projection)):
+            if projection[i] > thresh:
+                if not in_peak:
+                    pk_start = i
+                    in_peak = True
+            else:
+                if in_peak:
+                    peaks.append((pk_start + i) // 2)
+                    in_peak = False
+        if in_peak:
+            peaks.append((pk_start + len(projection) - 1) // 2)
+        if len(peaks) < VEH_PARTS_WINDOW_MIN_PEAKS:
+            return False
+        # Check regular spacing: gaps within 40% of median
+        gaps = [peaks[i + 1] - peaks[i] for i in range(len(peaks) - 1)]
+        if not gaps:
+            return False
+        med = sorted(gaps)[len(gaps) // 2]
+        if med < 5:
+            return False
+        regular = sum(1 for g in gaps if abs(g - med) / max(med, 1) < 0.4)
+        return regular >= len(gaps) * 0.6
+    def _veh_check_plate(self, vb, image_bgr):
+        """Run license plate detector on a vehicle crop. Returns True if plate found."""
+        if self.plate_session is None:
+            return False
+        bw = vb.x2 - vb.x1
+        if bw < VEH_PARTS_PLATE_MIN_PX:
+            return False
+        oh, ow = image_bgr.shape[:2]
+        # Crop vehicle region with 5% padding
+        pad_x = int(bw * 0.05)
+        pad_y = int((vb.y2 - vb.y1) * 0.05)
+        cx1 = max(0, int(vb.x1) - pad_x)
+        cy1 = max(0, int(vb.y1) - pad_y)
+        cx2 = min(ow, int(vb.x2) + pad_x)
+        cy2 = min(oh, int(vb.y2) + pad_y)
+        crop = image_bgr[cy1:cy2, cx1:cx2]
+        if crop.size == 0:
+            return False
+        # Letterbox to plate model input
+        ch, cw = crop.shape[:2]
+        r = min(self.plate_h / ch, self.plate_w / cw)
+        nw, nh = int(round(cw * r)), int(round(ch * r))
+        img_r = cv2.resize(crop, (nw, nh), interpolation=cv2.INTER_LINEAR)
+        dw, dh = self.plate_w - nw, self.plate_h - nh
+        pl, pt = dw // 2, dh // 2
+        img_p = cv2.copyMakeBorder(
+            img_r, pt, dh - pt, pl, dw - pl,
+            cv2.BORDER_CONSTANT, value=(114, 114, 114),
+        )
+        rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
+        inp = rgb.astype(np.float32) / 255.0
+        inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
+        raw = self.plate_session.run(None, {self.plate_input_name: inp})[0]
+        pred = raw[0] if raw.ndim == 3 else raw
+        # Handle both [N,6] end2end (post-NMS) and [N, 5+nc] raw formats
+        if pred.shape[0] < pred.shape[1]:
+            pred = pred.T  # transpose [5+nc, N] -> [N, 5+nc]
+        if pred.shape[1] < 5:
+            return False
+        # End2end post-NMS: few detections (< 500), col4=conf already final
+        if pred.shape[0] < 500 and pred.shape[1] == 6:
+            confs = pred[:, 4]
+        elif pred.shape[1] == 5:
+            confs = pred[:, 4]  # single objectness score
+        else:
+            # Raw: x,y,w,h,objectness,cls_scores... → conf = obj * max(cls)
+            confs = pred[:, 4] * np.max(pred[:, 5:], axis=1)
+        return bool((confs >= VEH_PARTS_PLATE_CONF).any())
+    def _vehicle_parts_confirm(self, vehicle_boxes, person_boxes, image_bgr):
+        """Parts-based confidence scoring for vehicle detections.
+        Scoring hierarchy (confidence boosts are additive):
+        1. License plate detected → +0.12 (strong, never suppress)
+        2. Person (driver/rider) inside vehicle → +0.08-0.10
+        3. Headlight pair detected → +0.05
+        4. Bus window pattern on truck → +0.06
+        5. No parts but small/distant or high-conf → keep original
+        6. Large + low-conf + no parts → suppress as FP
+        Small/distant vehicles (area < 0.4% of image) are always exempt.
+        Bus (cls_id=4) suppressed in _infer_vehicle — window check applies to trucks.
+        """
+        if not vehicle_boxes or not VEH_PARTS_ENABLED:
+            return vehicle_boxes
+        oh, ow = image_bgr.shape[:2]
+        img_area = float(oh * ow)
+        has_plate_model = self.plate_session is not None
+        # Skip plate checks on crowded scenes (aerial/drone, plates invisible)
+        skip_plate = len(vehicle_boxes) > 20
+        result = []
+        n_driver = 0
+        n_rider = 0
+        n_rider_lean = 0
+        n_headlight = 0
+        n_window = 0
+        n_plate = 0
+        n_suppressed = 0
+        for vb in vehicle_boxes:
+            bw = vb.x2 - vb.x1
+            bh = vb.y2 - vb.y1
+            area_ratio = (bw * bh) / img_area
+            # Small/distant: exempt from parts check
+            if area_ratio < VEH_PARTS_SMALL_AREA:
+                result.append(vb)
+                continue
+            boost = 0.0
+            confirmed = False
+            # Check 1: License plate (strongest signal)
+            if has_plate_model and not skip_plate and bw >= VEH_PARTS_PLATE_MIN_PX:
+                try:
+                    if self._veh_check_plate(vb, image_bgr):
+                        boost += VEH_PARTS_BOOST_PLATE
+                        confirmed = True
+                        n_plate += 1
+                except Exception:
+                    pass
+            # Check 2: Driver/passenger inside car or truck
+            if vb.cls_id in (1, 2):
+                if self._veh_check_driver(vb, person_boxes):
+                    boost += VEH_PARTS_BOOST_DRIVER
+                    confirmed = True
+                    n_driver += 1
+            # Check 3: Motorcycle rider (overlap + optional lean pose)
+            if vb.cls_id == 3:
+                has_overlap, has_lean = self._veh_check_rider(vb, person_boxes)
+                if has_overlap:
+                    boost += VEH_PARTS_BOOST_RIDER
+                    if has_lean:
+                        boost += 0.05  # Extra for confirmed lean pose
+                        n_rider_lean += 1
+                    confirmed = True
+                    n_rider += 1
+            # Check 4: Headlight pair
+            if bw >= VEH_PARTS_HL_MIN_PX:
+                try:
+                    if self._veh_check_headlights(vb, image_bgr):
+                        boost += VEH_PARTS_BOOST_HL
+                        confirmed = True
+                        n_headlight += 1
+                except Exception:
+                    pass
+            # Check 5: Window pattern (large trucks that might be buses)
+            if vb.cls_id == 2 and bw >= VEH_PARTS_WINDOW_MIN_PX:
+                try:
+                    if self._veh_check_windows(vb, image_bgr):
+                        boost += VEH_PARTS_BOOST_WINDOW
+                        n_window += 1
+                except Exception:
+                    pass
+            # Apply boost and decide
+            new_conf = min(1.0, vb.conf + boost)
+            if confirmed:
+                result.append(BoundingBox(
+                    x1=vb.x1, y1=vb.y1, x2=vb.x2, y2=vb.y2,
+                    cls_id=vb.cls_id, conf=new_conf,
+                ))
+            elif area_ratio > VEH_PARTS_FP_AREA:
+                # Large vehicle — use stricter threshold if plate model loaded
+                fp_thresh = VEH_PARTS_FP_CONF_STRICT if (has_plate_model and not skip_plate) else VEH_PARTS_FP_CONF
+                if vb.conf < fp_thresh:
+                    n_suppressed += 1
+                else:
+                    result.append(vb)
+            else:
+                result.append(vb)
+        if n_driver or n_rider or n_headlight or n_window or n_plate or n_suppressed:
+            logger.info(f"[veh-parts] plate={n_plate} driver={n_driver} rider={n_rider}"
+                        f"(lean={n_rider_lean}) hl={n_headlight} win={n_window} "
+                        f"suppress={n_suppressed}, kept {len(result)}/{len(vehicle_boxes)}")
+        return result
     # ── Person preprocessing (letterbox) ──────────────────────────────────
     def _per_letterbox(self, img):
         return np.array(keep_b), np.array(keep_s)
+    # ── Pose FP filter + box refinement ──────────────────────────────────
+    def _pose_run(self, image_bgr):
+        """Run pose model on full image, return (boxes [N,4], confs [N], keypoints [N,17,3]) in original coords."""
+        if self.pose_session is None:
+            return np.empty((0, 4)), np.empty(0), np.empty((0, 17, 3))
+        oh, ow = image_bgr.shape[:2]
+        # Letterbox to pose model input size
+        r = min(self.pose_h / oh, self.pose_w / ow)
+        nw, nh = int(round(ow * r)), int(round(oh * r))
+        img_r = cv2.resize(image_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
+        dw, dh = self.pose_w - nw, self.pose_h - nh
+        pl, pt = dw // 2, dh // 2
+        img_p = cv2.copyMakeBorder(
+            img_r, pt, dh - pt, pl, dw - pl,
+            cv2.BORDER_CONSTANT, value=(114, 114, 114),
+        )
+        rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
+        inp = rgb.astype(np.float32) / 255.0
+        inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
+        raw = self.pose_session.run(None, {self.pose_input_name: inp})[0]
+        # raw shape: [1, 56, 8400] -> transpose to [8400, 56]
+        pred = raw[0] if raw.ndim == 3 else raw
+        if pred.shape[0] < pred.shape[1]:
+            pred = pred.T
+        # Decode: cols 0-3=xywh, col 4=conf, cols 5-55=17*3 keypoints
+        confs = pred[:, 4]
+        keep = confs >= POSE_CONF_THRESH
+        if not keep.any():
+            return np.empty((0, 4)), np.empty(0), np.empty((0, 17, 3))
+        pred = pred[keep]
+        confs = pred[:, 4]
+        # Convert xywh to x1y1x2y2 in original coords
+        cx, cy, bw, bh = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
+        x1 = np.clip((cx - bw / 2 - pl) / r, 0, ow)
+        y1 = np.clip((cy - bh / 2 - pt) / r, 0, oh)
+        x2 = np.clip((cx + bw / 2 - pl) / r, 0, ow)
+        y2 = np.clip((cy + bh / 2 - pt) / r, 0, oh)
+        boxes = np.stack([x1, y1, x2, y2], axis=1)
+        # Decode keypoints: [N, 51] -> [N, 17, 3]
+        kp_raw = pred[:, 5:].reshape(-1, 17, 3).copy()
+        kp_raw[:, :, 0] = (kp_raw[:, :, 0] - pl) / r  # x
+        kp_raw[:, :, 1] = (kp_raw[:, :, 1] - pt) / r  # y
+        kp_raw[:, :, 0] = np.clip(kp_raw[:, :, 0], 0, ow)
+        kp_raw[:, :, 1] = np.clip(kp_raw[:, :, 1], 0, oh)
+        # NMS on pose detections
+        order = np.argsort(-confs)
+        boxes = boxes[order]
+        confs = confs[order]
+        kp_raw = kp_raw[order]
+        keep_idx = []
+        suppressed = set()
+        for i in range(len(boxes)):
+            if i in suppressed:
+                continue
+            keep_idx.append(i)
+            for j in range(i + 1, len(boxes)):
+                if j in suppressed:
+                    continue
+                xx1 = max(boxes[i, 0], boxes[j, 0])
+                yy1 = max(boxes[i, 1], boxes[j, 1])
+                xx2 = min(boxes[i, 2], boxes[j, 2])
+                yy2 = min(boxes[i, 3], boxes[j, 3])
+                inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
+                a1 = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
+                a2 = (boxes[j, 2] - boxes[j, 0]) * (boxes[j, 3] - boxes[j, 1])
+                iou_val = inter / (a1 + a2 - inter + 1e-9)
+                if iou_val >= POSE_NMS_IOU:
+                    suppressed.add(j)
+        if not keep_idx:
+            return np.empty((0, 4)), np.empty(0), np.empty((0, 17, 3))
+        keep_idx = np.array(keep_idx)
+        return boxes[keep_idx], confs[keep_idx], kp_raw[keep_idx]
+    _FACE_SIZE = 640
+    _FACE_STRIDES = (8, 16, 32)
+    _FACE_NUM_ANCHORS = 2
+    _FACE_THRESH = 0.5
+    _FACE_NMS_THRESH = 0.4
+    def _face_run(self, image_bgr):
+        """Run SCRFD-500M face detector. Returns (face_boxes [N,4], face_confs [N])."""
+        if self.face_session is None:
+            return np.empty((0, 4)), np.empty(0)
+        oh, ow = image_bgr.shape[:2]
+        sz = self._FACE_SIZE
+        # Letterbox resize preserving aspect ratio (top-left aligned)
+        scale = min(sz / oh, sz / ow)
+        nw, nh = int(round(ow * scale)), int(round(oh * scale))
+        resized = cv2.resize(image_bgr, (nw, nh), interpolation=cv2.INTER_LINEAR)
+        det_img = np.zeros((sz, sz, 3), dtype=np.uint8)
+        det_img[:nh, :nw, :] = resized
+        # Preprocess: BGR→RGB, (pixel - 127.5) / 128.0
+        blob = cv2.dnn.blobFromImage(
+            det_img, 1.0 / 128.0, (sz, sz), (127.5, 127.5, 127.5), swapRB=True,
+        )
+        outputs = self.face_session.run(None, {self.face_input_name: blob})
+        # Decode 3 stride levels: outputs[0:3]=scores, [3:6]=bboxes, [6:9]=kps
+        all_scores, all_boxes = [], []
+        for idx, stride in enumerate(self._FACE_STRIDES):
+            scores = outputs[idx][:, 0]  # (N,)
+            bbox_d = outputs[idx + 3]    # (N, 4) distances
+            keep = scores >= self._FACE_THRESH
+            if not keep.any():
+                continue
+            scores = scores[keep]
+            bbox_d = bbox_d[keep]
+            # Generate anchor centers for kept positions
+            fh, fw = sz // stride, sz // stride
+            grid_y, grid_x = np.mgrid[:fh, :fw]
+            centers = np.stack([grid_x, grid_y], axis=-1).astype(np.float32).reshape(-1, 2)
+            centers = np.tile(centers, (1, self._FACE_NUM_ANCHORS)).reshape(-1, 2) * stride
+            centers = centers[keep]
+            # distance → bbox: [x1, y1, x2, y2]
+            x1 = centers[:, 0] - bbox_d[:, 0] * stride
+            y1 = centers[:, 1] - bbox_d[:, 1] * stride
+            x2 = centers[:, 0] + bbox_d[:, 2] * stride
+            y2 = centers[:, 1] + bbox_d[:, 3] * stride
+            boxes = np.stack([x1, y1, x2, y2], axis=-1) / scale
+            all_scores.append(scores)
+            all_boxes.append(boxes)
+        if not all_scores:
+            return np.empty((0, 4)), np.empty(0)
+        scores = np.concatenate(all_scores)
+        boxes = np.concatenate(all_boxes)
+        # NMS
+        order = scores.argsort()[::-1]
+        scores, boxes = scores[order], boxes[order]
+        keep = []
+        x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
+        areas = (x2 - x1) * (y2 - y1)
+        suppressed = np.zeros(len(scores), dtype=bool)
+        for i in range(len(scores)):
+            if suppressed[i]:
+                continue
+            keep.append(i)
+            xx1 = np.maximum(x1[i], x1[i + 1:])
+            yy1 = np.maximum(y1[i], y1[i + 1:])
+            xx2 = np.minimum(x2[i], x2[i + 1:])
+            yy2 = np.minimum(y2[i], y2[i + 1:])
+            inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
+            ovr = inter / (areas[i] + areas[i + 1:] - inter + 1e-6)
+            suppressed[i + 1:] |= ovr > self._FACE_NMS_THRESH
+        return boxes[keep], scores[keep]
+    @staticmethod
+    def _anatomical_score(kps, kp_conf_thresh=POSE_KP_CONF):
+        """Compute weighted anatomical score from keypoints [17, 3].
+        Returns (score, has_head, n_visible):
+          score: weighted sum of visible keypoints (0.0-1.0)
+          has_head: True if any head keypoint (nose/eyes/ears) is visible
+          n_visible: number of visible keypoints
+        """
+        visible = kps[:, 2] >= kp_conf_thresh
+        n_visible = int(visible.sum())
+        score = float((visible.astype(np.float32) * POSE_KP_WEIGHTS).sum())
+        has_head = bool(visible[POSE_HEAD_KP].any())
+        return score, has_head, n_visible
+    def _refine_box_with_keypoints(self, pb, kps, ow, oh):
+        """Blend person box with tight keypoint bbox."""
+        visible = kps[:, 2] >= POSE_KP_CONF
+        if not visible.any():
+            return pb
+        vis_kps = kps[visible]
+        kp_x1 = float(vis_kps[:, 0].min())
+        kp_y1 = float(vis_kps[:, 1].min())
+        kp_x2 = float(vis_kps[:, 0].max())
+        kp_y2 = float(vis_kps[:, 1].max())
+        # Pad around keypoint bbox
+        kp_w = kp_x2 - kp_x1
+        kp_h = kp_y2 - kp_y1
+        pad_x = kp_w * POSE_KP_PAD
+        pad_y = kp_h * POSE_KP_PAD
+        kp_x1 = max(0, kp_x1 - pad_x)
+        kp_y1 = max(0, kp_y1 - pad_y)
+        kp_x2 = min(ow, kp_x2 + pad_x)
+        kp_y2 = min(oh, kp_y2 + pad_y)
+        a = POSE_REFINE_BLEND
+        return BoundingBox(
+            x1=max(0, min(ow, int(pb.x1 * (1 - a) + kp_x1 * a))),
+            y1=max(0, min(oh, int(pb.y1 * (1 - a) + kp_y1 * a))),
+            x2=max(0, min(ow, int(pb.x2 * (1 - a) + kp_x2 * a))),
+            y2=max(0, min(oh, int(pb.y2 * (1 - a) + kp_y2 * a))),
+            cls_id=0,
+            conf=pb.conf,
+        )
+    def _pose_filter_refine(self, person_boxes, image_bgr):
+        """Filter FP detections and refine boxes using anatomical keypoint scoring.
+        Anatomical scoring: weighted sum of visible keypoints where head/face
+        keypoints (nose, eyes, ears) contribute most, upper body (shoulders,
+        elbows, wrists) next, lower body (hips, knees, ankles) least.
+        Decision logic:
+        1. Run pose model once on full image.
+        2. Run face detector (if available) for additional confirmation.
+        3. Match each person detection to best-overlapping pose detection.
+        4. For matched boxes:
+           a. Head keypoints visible OR face detected → KEEP + refine (never suppress)
+           b. Anatomical score >= REFINE threshold → KEEP + refine
+           c. Anatomical score > 0 → KEEP as-is (partially visible person)
+           d. Anatomical score == 0 + large + low-conf → SUPPRESS (FP candidate)
+        5. For unmatched boxes:
+           a. Face detected inside box → KEEP
+           b. Large + low-conf → SUPPRESS
+           c. Small or high-conf → KEEP (SAHI-detected or confident)
+        """
+        if not person_boxes or self.pose_session is None:
+            return person_boxes
+        oh, ow = image_bgr.shape[:2]
+        img_area = float(oh * ow)
+        # Run pose model
+        t_pose = time.monotonic()
+        pose_boxes, pose_confs, pose_kps = self._pose_run(image_bgr)
+        dt_pose = (time.monotonic() - t_pose) * 1000
+        # Cache pose data for motorcycle rider check in vehicle parts confirmation
+        self._cached_pose_data = (pose_boxes, pose_kps)
+        # Run face detector if available
+        face_boxes = np.empty((0, 4))
+        if self.face_session is not None:
+            t_face = time.monotonic()
+            face_boxes, _ = self._face_run(image_bgr)
+            dt_face = (time.monotonic() - t_face) * 1000
+            logger.info(f"[pose] {len(pose_boxes)} pose, {len(face_boxes)} faces "
+                        f"in {dt_pose:.0f}+{dt_face:.0f}ms")
+        else:
+            logger.info(f"[pose] {len(pose_boxes)} pose detections in {dt_pose:.0f}ms")
+        # Helper: check if any face detection is inside a person box
+        def has_face_inside(pb):
+            if len(face_boxes) == 0:
+                return False
+            for fb in face_boxes:
+                # Face center must be inside person box
+                fcx = (fb[0] + fb[2]) / 2
+                fcy = (fb[1] + fb[3]) / 2
+                if pb.x1 <= fcx <= pb.x2 and pb.y1 <= fcy <= pb.y2:
+                    return True
+            return False
+        if len(pose_boxes) == 0:
+            # No pose detections — use face detector or size/conf heuristic
+            result = []
+            n_suppressed = 0
+            for pb in person_boxes:
+                if has_face_inside(pb):
+                    result.append(pb)
+                    continue
+                bw = pb.x2 - pb.x1
+                bh = pb.y2 - pb.y1
+                area_ratio = (bw * bh) / img_area
+                if area_ratio > POSE_FP_MIN_AREA and pb.conf < POSE_FP_MAX_CONF:
+                    n_suppressed += 1
+                    continue
+                result.append(pb)
+            if n_suppressed:
+                logger.info(f"[pose] Suppressed {n_suppressed} FP (no pose detections)")
+            return result
+        # Match person detections to pose detections via IoU
+        result = []
+        n_refined = 0
+        n_suppressed = 0
+        n_face_saved = 0
+        for pb in person_boxes:
+            pb_arr = np.array([pb.x1, pb.y1, pb.x2, pb.y2], dtype=float)
+            best_iou = 0.0
+            best_idx = -1
+            for j in range(len(pose_boxes)):
+                xx1 = max(pb_arr[0], pose_boxes[j, 0])
+                yy1 = max(pb_arr[1], pose_boxes[j, 1])
+                xx2 = min(pb_arr[2], pose_boxes[j, 2])
+                yy2 = min(pb_arr[3], pose_boxes[j, 3])
+                inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
+                a1 = (pb_arr[2] - pb_arr[0]) * (pb_arr[3] - pb_arr[1])
+                a2 = (pose_boxes[j, 2] - pose_boxes[j, 0]) * (pose_boxes[j, 3] - pose_boxes[j, 1])
+                iou_val = inter / (a1 + a2 - inter + 1e-9)
+                if iou_val > best_iou:
+                    best_iou = iou_val
+                    best_idx = j
+            if best_iou >= POSE_MATCH_IOU and best_idx >= 0:
+                # Matched to a pose detection — compute anatomical score
+                kps = pose_kps[best_idx]  # [17, 3]
+                anat_score, has_head, n_vis = self._anatomical_score(kps)
+                if has_head or has_face_inside(pb):
+                    # Head/face visible → definitely a person, refine box
+                    result.append(self._refine_box_with_keypoints(pb, kps, ow, oh))
+                    n_refined += 1
+                elif anat_score >= POSE_ANAT_REFINE_THRESH:
+                    # Good anatomical score → person confirmed, refine
+                    result.append(self._refine_box_with_keypoints(pb, kps, ow, oh))
+                    n_refined += 1
+                elif anat_score > POSE_ANAT_SUPPRESS_THRESH:
+                    # Some keypoints visible but low score — keep as-is
+                    result.append(pb)
+                else:
+                    # Matched to pose bbox but ZERO keypoints visible
+                    # Only suppress if also large and low confidence
+                    bw = pb.x2 - pb.x1
+                    bh = pb.y2 - pb.y1
+                    area_ratio = (bw * bh) / img_area
+                    if area_ratio > POSE_FP_MIN_AREA and pb.conf < POSE_FP_MAX_CONF:
+                        n_suppressed += 1
+                        continue
+                    result.append(pb)
+            else:
+                # Not matched to any pose detection
+                if has_face_inside(pb):
+                    # Face detector confirms a person
+                    result.append(pb)
+                    n_face_saved += 1
+                    continue
+                bw = pb.x2 - pb.x1
+                bh = pb.y2 - pb.y1
+                area_ratio = (bw * bh) / img_area
+                if area_ratio > POSE_FP_MIN_AREA and pb.conf < POSE_FP_MAX_CONF:
+                    # Large unmatched low-conf box — likely FP
+                    n_suppressed += 1
+                    continue
+                else:
+                    # Small box or high conf — keep
+                    result.append(pb)
+        if n_refined or n_suppressed or n_face_saved:
+            logger.info(f"[pose] Refined {n_refined}, suppressed {n_suppressed} FP, "
+                        f"face-saved {n_face_saved}, "
+                        f"kept {len(result)}/{len(person_boxes)}")
+        return result
+    # ── Person inference with SAHI tiling ────────────────────────────────
     def _infer_person(self, image_bgr):
+        """Person detection with SAHI-inspired tiled inference + dynamic NMS.
         Pipeline:
+        1. Full-image pass at native 960px
         2. 2 tiled passes (higher effective resolution for small/distant people)
         3. Flip TTA pass if time budget allows
+        4. Dynamic NMS merge (adapts IoU threshold to scene density)
         5. Sanity filters
+        6. Pose FP filter + box refinement
         """
         oh, ow = image_bgr.shape[:2]
         t_start = time.monotonic()
         all_boxes = []   # list of [N, 4] arrays
         all_confs = []   # list of [N] arrays
+        # Pass 1: full image at native 960px
         boxes_full, confs_full = self._per_run_pass(image_bgr, PER_CONF_LOW)
         if len(boxes_full) > 0:
             all_boxes.append(boxes_full)
         if not all_boxes:
             return []
+        # Dynamic NMS: adapt IoU threshold to scene density
         merged_b = np.concatenate(all_boxes)
         merged_s = np.concatenate(all_confs)
+        n_raw = len(merged_s)
+        nms_iou = 0.60 if n_raw > 30 else (0.40 if n_raw < 10 else PER_NMS_IOU)
+        merged_b, merged_s = self._nms_max_conf(merged_b, merged_s, nms_iou)
         if len(merged_b) == 0:
             return []
                 cls_id=0,
                 conf=max(0.0, min(1.0, float(merged_s[i]))),
             ))
+        # Pose FP filter + box refinement (only if time budget allows)
+        if time.monotonic() - t_start < PER_RTF_BUDGET * 0.85:
+            out = self._pose_filter_refine(out, image_bgr)
         return out
     # ── Unified inference ───────────────────────────────────────────────────
     def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
+        self._cached_pose_data = None  # reset before each frame
         if ENABLE_PARALLEL:
             veh_future = self._executor.submit(self._infer_vehicle, image_bgr)
             per_future = self._executor.submit(self._infer_person, image_bgr)
         else:
             vehicle_boxes = self._infer_vehicle(image_bgr)
             person_boxes = self._infer_person(image_bgr)
+        # Vehicle parts confirmation: cross-reference with person detections
+        vehicle_boxes = self._vehicle_parts_confirm(
+            vehicle_boxes, person_boxes, image_bgr)
         return vehicle_boxes + person_boxes
     # -- Replay buffer -------------------------------------------------------
         ).start()
         return results
+# Miner v3.15 — background TRT engine build + CUDA-first fallback 20260402

plate_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3bd9b9f88dd75dec77f974e2f3a81f6bbe689e0e5e587b32cc4c8881dad8034
+size 1930779

pose_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be33b5f7c7f04052cff87ac9f3c7a56e6d2055c9524dae3a2dc9229be488afaa
+size 6800452

vehicle_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00bae032cee689d04b3b9131cd80134d03c17972490190f45a5f2aa96f9b703a
+size 21244589