meaculpitt commited on
Commit
4108697
Β·
verified Β·
1 Parent(s): b6745f3

revert to v3.29: undo v3.30 vehicle TRT changes

Browse files
Files changed (1) hide show
  1. sv_gpu.py +28 -43
sv_gpu.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Score Vision SN44 β€” Unified miner v3.30 (2026-04-08). R9c vehicle FP16 (mAP50=0.929) + TRT background build. Person: TTA consensus + 15% box shrink + NMS 0.35. Vehicle TTA disabled for RTF compliance.
3
  Dual-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT).
4
  Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
5
  Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
@@ -368,8 +368,7 @@ TRT_WORKSPACE_GB = 4
368
  WBF_SKIP_THR = 0.0001
369
 
370
  # ── Speed config ────────────────────────────────────────────────────────────
371
- ENABLE_TTA = True # Person TTA (flip consensus merge)
372
- VEHICLE_TTA = False # Vehicle flip TTA disabled β€” saves ~200-400ms, RTF compliance
373
  ENABLE_PARALLEL = True
374
 
375
  # ── Secondary HF repo for vehicle weights ───────────────────────────────────
@@ -609,22 +608,13 @@ class Miner:
609
  self._trt_ready = False
610
  logger.info("[init] Person model: CUDA (TRT build starting in background)")
611
 
612
- # Launch background TRT engine builds
613
  os.makedirs(TRT_CACHE_PATH, exist_ok=True)
614
  threading.Thread(
615
  target=self._build_trt_engine,
616
- args=(per_onnx, "person"),
617
  daemon=True,
618
- name="trt-builder-person",
619
- ).start()
620
-
621
- # Vehicle TRT build β€” same pattern, separate thread
622
- self._veh_trt_ready = False
623
- threading.Thread(
624
- target=self._build_trt_engine,
625
- args=(veh_weights, "vehicle"),
626
- daemon=True,
627
- name="trt-builder-vehicle",
628
  ).start()
629
 
630
  # Pose model β€” for FP filtering + box refinement
@@ -686,12 +676,12 @@ class Miner:
686
  logger.info(f"Person ORT providers: {per_prov} (TRT building in background)")
687
  logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
688
 
689
- def _build_trt_engine(self, onnx_path, model_name="person"):
690
- """Build TRT FP16 engine in background, swap session when ready.
691
 
692
  On fresh nodes: ~18 min to compile. Cached engine loads in <1s.
693
- During build, inference uses CUDAExecutionProvider.
694
- After build, atomically swaps to TRT session.
695
  """
696
  try:
697
  trt_opts = {
@@ -701,9 +691,9 @@ class Miner:
701
  "trt_engine_cache_path": TRT_CACHE_PATH,
702
  }
703
  t0 = time.monotonic()
704
- logger.info("[trt-build-%s] Creating TRT session (may take ~18min on fresh node)...", model_name)
705
  trt_session = ort.InferenceSession(
706
- onnx_path,
707
  providers=[
708
  ("TensorrtExecutionProvider", trt_opts),
709
  "CUDAExecutionProvider",
@@ -713,7 +703,7 @@ class Miner:
713
 
714
  provs = trt_session.get_providers()
715
  if "TensorrtExecutionProvider" not in provs:
716
- logger.warning("[trt-build-%s] TRT provider not active (%s), keeping CUDA", model_name, provs)
717
  return
718
 
719
  # Run dummy inference to fully materialize the engine
@@ -723,24 +713,21 @@ class Miner:
723
  trt_session.run(None, {inp_name: dummy})
724
 
725
  dt = time.monotonic() - t0
726
- logger.info("[trt-build-%s] TRT engine ready in %.1fs β€” swapping session", model_name, dt)
727
 
728
  # Atomic swap β€” Python GIL makes single attribute assignment safe.
729
- if model_name == "person":
730
- self.per_session = trt_session
731
- self._trt_ready = True
732
- logger.info("[trt-build-person] Person model now using TensorRT FP16")
733
- elif model_name == "vehicle":
734
- self.veh_session = trt_session
735
- self._veh_trt_ready = True
736
- logger.info("[trt-build-vehicle] Vehicle model now using TensorRT FP16")
737
  except Exception as e:
738
- logger.warning("[trt-build-%s] TRT build failed (%s), keeping CUDA", model_name, e)
739
 
740
  def __repr__(self) -> str:
741
- per_trt = "TRT" if self._trt_ready else "CUDA (TRT building)"
742
- veh_trt = "TRT" if self._veh_trt_ready else "CUDA (TRT building)"
743
- return f"Unified Miner v3.30 β€” person={per_trt}, vehicle={veh_trt}"
744
 
745
  # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
746
 
@@ -798,7 +785,7 @@ class Miner:
798
  boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES, session)
799
 
800
  # Flip TTA pass β€” horizontal flip, mirror boxes back
801
- if VEHICLE_TTA:
802
  flipped = cv2.flip(image_bgr, 1)
803
  f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF, session)
804
  if len(f_boxes) > 0:
@@ -1174,7 +1161,7 @@ class Miner:
1174
  confs = pred[:, 4] * np.max(pred[:, 5:], axis=1)
1175
  return bool((confs >= VEH_PARTS_PLATE_CONF).any())
1176
 
1177
- def _vehicle_parts_confirm(self, vehicle_boxes, person_boxes, image_bgr, skip_plate=False):
1178
  """Parts-based confidence scoring for vehicle detections.
1179
 
1180
  Scoring hierarchy (confidence boosts are additive):
@@ -1195,9 +1182,7 @@ class Miner:
1195
  img_area = float(oh * ow)
1196
  has_plate_model = self.plate_session is not None
1197
  # Skip plate checks on crowded scenes (aerial/drone, plates invisible)
1198
- # Also skip when called from vehicle-only hint (saves ONNX inference latency)
1199
- if not skip_plate:
1200
- skip_plate = len(vehicle_boxes) > 20
1201
 
1202
  result = []
1203
  n_driver = 0
@@ -2098,10 +2083,10 @@ class Miner:
2098
 
2099
  if element_hint == 'vehicle':
2100
  # Run vehicle detection + parts confirmation with empty person_boxes.
2101
- # Skip plate ONNX to save latency β€” no person boxes means driver/rider
2102
- # checks are no-ops anyway.
2103
  vehicle_boxes = self._infer_vehicle(image_bgr)
2104
- return self._vehicle_parts_confirm(vehicle_boxes, [], image_bgr, skip_plate=True)
2105
 
2106
  # Fallback: run both (original behavior)
2107
  if ENABLE_PARALLEL:
 
1
  """
2
+ Score Vision SN44 β€” Unified miner v3.29 (2026-04-08). R9c vehicle FP16 (mAP50=0.929). Person: TTA consensus + 15% box shrink + NMS 0.35.
3
  Dual-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT).
4
  Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
5
  Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
 
368
  WBF_SKIP_THR = 0.0001
369
 
370
  # ── Speed config ────────────────────────────────────────────────────────────
371
+ ENABLE_TTA = True
 
372
  ENABLE_PARALLEL = True
373
 
374
  # ── Secondary HF repo for vehicle weights ───────────────────────────────────
 
608
  self._trt_ready = False
609
  logger.info("[init] Person model: CUDA (TRT build starting in background)")
610
 
611
+ # Launch background TRT engine build
612
  os.makedirs(TRT_CACHE_PATH, exist_ok=True)
613
  threading.Thread(
614
  target=self._build_trt_engine,
615
+ args=(per_onnx,),
616
  daemon=True,
617
+ name="trt-builder",
 
 
 
 
 
 
 
 
 
618
  ).start()
619
 
620
  # Pose model β€” for FP filtering + box refinement
 
676
  logger.info(f"Person ORT providers: {per_prov} (TRT building in background)")
677
  logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
678
 
679
+ def _build_trt_engine(self, per_onnx):
680
+ """Build TRT FP16 engine in background, swap person session when ready.
681
 
682
  On fresh nodes: ~18 min to compile. Cached engine loads in <1s.
683
+ During build, inference uses CUDAExecutionProvider (passes RTF at ~78ms).
684
+ After build, atomically swaps to TRT session (~29ms pipeline).
685
  """
686
  try:
687
  trt_opts = {
 
691
  "trt_engine_cache_path": TRT_CACHE_PATH,
692
  }
693
  t0 = time.monotonic()
694
+ logger.info("[trt-build] Creating TRT session (may take ~18min on fresh node)...")
695
  trt_session = ort.InferenceSession(
696
+ per_onnx,
697
  providers=[
698
  ("TensorrtExecutionProvider", trt_opts),
699
  "CUDAExecutionProvider",
 
703
 
704
  provs = trt_session.get_providers()
705
  if "TensorrtExecutionProvider" not in provs:
706
+ logger.warning("[trt-build] TRT provider not active (%s), keeping CUDA", provs)
707
  return
708
 
709
  # Run dummy inference to fully materialize the engine
 
713
  trt_session.run(None, {inp_name: dummy})
714
 
715
  dt = time.monotonic() - t0
716
+ logger.info("[trt-build] TRT engine ready in %.1fs β€” swapping person session", dt)
717
 
718
  # Atomic swap β€” Python GIL makes single attribute assignment safe.
719
+ # Any in-flight inference holds a reference to the old session, which
720
+ # stays alive until that inference completes.
721
+ self.per_session = trt_session
722
+ self._trt_ready = True
723
+
724
+ logger.info("[trt-build] Person model now using TensorRT FP16")
 
 
725
  except Exception as e:
726
+ logger.warning("[trt-build] TRT build failed (%s), keeping CUDA", e)
727
 
728
  def __repr__(self) -> str:
729
+ trt_status = "TRT" if self._trt_ready else "CUDA (TRT building)"
730
+ return f"Unified Miner v3.16 β€” person={trt_status}, background TRT engine build"
 
731
 
732
  # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
733
 
 
785
  boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES, session)
786
 
787
  # Flip TTA pass β€” horizontal flip, mirror boxes back
788
+ if ENABLE_TTA:
789
  flipped = cv2.flip(image_bgr, 1)
790
  f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF, session)
791
  if len(f_boxes) > 0:
 
1161
  confs = pred[:, 4] * np.max(pred[:, 5:], axis=1)
1162
  return bool((confs >= VEH_PARTS_PLATE_CONF).any())
1163
 
1164
+ def _vehicle_parts_confirm(self, vehicle_boxes, person_boxes, image_bgr):
1165
  """Parts-based confidence scoring for vehicle detections.
1166
 
1167
  Scoring hierarchy (confidence boosts are additive):
 
1182
  img_area = float(oh * ow)
1183
  has_plate_model = self.plate_session is not None
1184
  # Skip plate checks on crowded scenes (aerial/drone, plates invisible)
1185
+ skip_plate = len(vehicle_boxes) > 20
 
 
1186
 
1187
  result = []
1188
  n_driver = 0
 
2083
 
2084
  if element_hint == 'vehicle':
2085
  # Run vehicle detection + parts confirmation with empty person_boxes.
2086
+ # Plate/headlight/window checks fire normally; driver/rider overlap
2087
+ # check finds no matches (boost=0) but doesn't suppress.
2088
  vehicle_boxes = self._infer_vehicle(image_bgr)
2089
+ return self._vehicle_parts_confirm(vehicle_boxes, [], image_bgr)
2090
 
2091
  # Fallback: run both (original behavior)
2092
  if ENABLE_PARALLEL: