revert to v3.29: undo v3.30 vehicle TRT changes
Browse files
sv_gpu.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
Score Vision SN44 β Unified miner v3.
|
| 3 |
Dual-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT).
|
| 4 |
Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
|
| 5 |
Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
|
|
@@ -368,8 +368,7 @@ TRT_WORKSPACE_GB = 4
|
|
| 368 |
WBF_SKIP_THR = 0.0001
|
| 369 |
|
| 370 |
# ββ Speed config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 371 |
-
ENABLE_TTA = True
|
| 372 |
-
VEHICLE_TTA = False # Vehicle flip TTA disabled β saves ~200-400ms, RTF compliance
|
| 373 |
ENABLE_PARALLEL = True
|
| 374 |
|
| 375 |
# ββ Secondary HF repo for vehicle weights βββββββββββββββββββββββββββββββββββ
|
|
@@ -609,22 +608,13 @@ class Miner:
|
|
| 609 |
self._trt_ready = False
|
| 610 |
logger.info("[init] Person model: CUDA (TRT build starting in background)")
|
| 611 |
|
| 612 |
-
# Launch background TRT engine
|
| 613 |
os.makedirs(TRT_CACHE_PATH, exist_ok=True)
|
| 614 |
threading.Thread(
|
| 615 |
target=self._build_trt_engine,
|
| 616 |
-
args=(per_onnx,
|
| 617 |
daemon=True,
|
| 618 |
-
name="trt-builder
|
| 619 |
-
).start()
|
| 620 |
-
|
| 621 |
-
# Vehicle TRT build β same pattern, separate thread
|
| 622 |
-
self._veh_trt_ready = False
|
| 623 |
-
threading.Thread(
|
| 624 |
-
target=self._build_trt_engine,
|
| 625 |
-
args=(veh_weights, "vehicle"),
|
| 626 |
-
daemon=True,
|
| 627 |
-
name="trt-builder-vehicle",
|
| 628 |
).start()
|
| 629 |
|
| 630 |
# Pose model β for FP filtering + box refinement
|
|
@@ -686,12 +676,12 @@ class Miner:
|
|
| 686 |
logger.info(f"Person ORT providers: {per_prov} (TRT building in background)")
|
| 687 |
logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
|
| 688 |
|
| 689 |
-
def _build_trt_engine(self,
|
| 690 |
-
"""Build TRT FP16 engine in background, swap session when ready.
|
| 691 |
|
| 692 |
On fresh nodes: ~18 min to compile. Cached engine loads in <1s.
|
| 693 |
-
During build, inference uses CUDAExecutionProvider.
|
| 694 |
-
After build, atomically swaps to TRT session.
|
| 695 |
"""
|
| 696 |
try:
|
| 697 |
trt_opts = {
|
|
@@ -701,9 +691,9 @@ class Miner:
|
|
| 701 |
"trt_engine_cache_path": TRT_CACHE_PATH,
|
| 702 |
}
|
| 703 |
t0 = time.monotonic()
|
| 704 |
-
logger.info("[trt-build
|
| 705 |
trt_session = ort.InferenceSession(
|
| 706 |
-
|
| 707 |
providers=[
|
| 708 |
("TensorrtExecutionProvider", trt_opts),
|
| 709 |
"CUDAExecutionProvider",
|
|
@@ -713,7 +703,7 @@ class Miner:
|
|
| 713 |
|
| 714 |
provs = trt_session.get_providers()
|
| 715 |
if "TensorrtExecutionProvider" not in provs:
|
| 716 |
-
logger.warning("[trt-build
|
| 717 |
return
|
| 718 |
|
| 719 |
# Run dummy inference to fully materialize the engine
|
|
@@ -723,24 +713,21 @@ class Miner:
|
|
| 723 |
trt_session.run(None, {inp_name: dummy})
|
| 724 |
|
| 725 |
dt = time.monotonic() - t0
|
| 726 |
-
logger.info("[trt-build
|
| 727 |
|
| 728 |
# Atomic swap β Python GIL makes single attribute assignment safe.
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
self._veh_trt_ready = True
|
| 736 |
-
logger.info("[trt-build-vehicle] Vehicle model now using TensorRT FP16")
|
| 737 |
except Exception as e:
|
| 738 |
-
logger.warning("[trt-build
|
| 739 |
|
| 740 |
def __repr__(self) -> str:
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
return f"Unified Miner v3.30 β person={per_trt}, vehicle={veh_trt}"
|
| 744 |
|
| 745 |
# ββ Vehicle preprocessing (letterbox) βββββββββββββββββββββββββββββββββββ
|
| 746 |
|
|
@@ -798,7 +785,7 @@ class Miner:
|
|
| 798 |
boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES, session)
|
| 799 |
|
| 800 |
# Flip TTA pass β horizontal flip, mirror boxes back
|
| 801 |
-
if
|
| 802 |
flipped = cv2.flip(image_bgr, 1)
|
| 803 |
f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF, session)
|
| 804 |
if len(f_boxes) > 0:
|
|
@@ -1174,7 +1161,7 @@ class Miner:
|
|
| 1174 |
confs = pred[:, 4] * np.max(pred[:, 5:], axis=1)
|
| 1175 |
return bool((confs >= VEH_PARTS_PLATE_CONF).any())
|
| 1176 |
|
| 1177 |
-
def _vehicle_parts_confirm(self, vehicle_boxes, person_boxes, image_bgr
|
| 1178 |
"""Parts-based confidence scoring for vehicle detections.
|
| 1179 |
|
| 1180 |
Scoring hierarchy (confidence boosts are additive):
|
|
@@ -1195,9 +1182,7 @@ class Miner:
|
|
| 1195 |
img_area = float(oh * ow)
|
| 1196 |
has_plate_model = self.plate_session is not None
|
| 1197 |
# Skip plate checks on crowded scenes (aerial/drone, plates invisible)
|
| 1198 |
-
|
| 1199 |
-
if not skip_plate:
|
| 1200 |
-
skip_plate = len(vehicle_boxes) > 20
|
| 1201 |
|
| 1202 |
result = []
|
| 1203 |
n_driver = 0
|
|
@@ -2098,10 +2083,10 @@ class Miner:
|
|
| 2098 |
|
| 2099 |
if element_hint == 'vehicle':
|
| 2100 |
# Run vehicle detection + parts confirmation with empty person_boxes.
|
| 2101 |
-
#
|
| 2102 |
-
#
|
| 2103 |
vehicle_boxes = self._infer_vehicle(image_bgr)
|
| 2104 |
-
return self._vehicle_parts_confirm(vehicle_boxes, [], image_bgr
|
| 2105 |
|
| 2106 |
# Fallback: run both (original behavior)
|
| 2107 |
if ENABLE_PARALLEL:
|
|
|
|
| 1 |
"""
|
| 2 |
+
Score Vision SN44 β Unified miner v3.29 (2026-04-08). R9c vehicle FP16 (mAP50=0.929). Person: TTA consensus + 15% box shrink + NMS 0.35.
|
| 3 |
Dual-model: vehicle (YOLO11m INT8 1280) + person (YOLO12s FP16 960 TRT).
|
| 4 |
Pose model: YOLOv8n-pose FP16 640 for false-positive filtering + keypoint box refinement.
|
| 5 |
Vehicle weights loaded from secondary HF repo (meaculpitt/ScoreVision-Vehicle).
|
|
|
|
| 368 |
WBF_SKIP_THR = 0.0001
|
| 369 |
|
| 370 |
# ββ Speed config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 371 |
+
ENABLE_TTA = True
|
|
|
|
| 372 |
ENABLE_PARALLEL = True
|
| 373 |
|
| 374 |
# ββ Secondary HF repo for vehicle weights βββββββββββββββββββββββββββββββββββ
|
|
|
|
| 608 |
self._trt_ready = False
|
| 609 |
logger.info("[init] Person model: CUDA (TRT build starting in background)")
|
| 610 |
|
| 611 |
+
# Launch background TRT engine build
|
| 612 |
os.makedirs(TRT_CACHE_PATH, exist_ok=True)
|
| 613 |
threading.Thread(
|
| 614 |
target=self._build_trt_engine,
|
| 615 |
+
args=(per_onnx,),
|
| 616 |
daemon=True,
|
| 617 |
+
name="trt-builder",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
).start()
|
| 619 |
|
| 620 |
# Pose model β for FP filtering + box refinement
|
|
|
|
| 676 |
logger.info(f"Person ORT providers: {per_prov} (TRT building in background)")
|
| 677 |
logger.info(f"TTA={ENABLE_TTA} PARALLEL={ENABLE_PARALLEL}")
|
| 678 |
|
| 679 |
+
def _build_trt_engine(self, per_onnx):
|
| 680 |
+
"""Build TRT FP16 engine in background, swap person session when ready.
|
| 681 |
|
| 682 |
On fresh nodes: ~18 min to compile. Cached engine loads in <1s.
|
| 683 |
+
During build, inference uses CUDAExecutionProvider (passes RTF at ~78ms).
|
| 684 |
+
After build, atomically swaps to TRT session (~29ms pipeline).
|
| 685 |
"""
|
| 686 |
try:
|
| 687 |
trt_opts = {
|
|
|
|
| 691 |
"trt_engine_cache_path": TRT_CACHE_PATH,
|
| 692 |
}
|
| 693 |
t0 = time.monotonic()
|
| 694 |
+
logger.info("[trt-build] Creating TRT session (may take ~18min on fresh node)...")
|
| 695 |
trt_session = ort.InferenceSession(
|
| 696 |
+
per_onnx,
|
| 697 |
providers=[
|
| 698 |
("TensorrtExecutionProvider", trt_opts),
|
| 699 |
"CUDAExecutionProvider",
|
|
|
|
| 703 |
|
| 704 |
provs = trt_session.get_providers()
|
| 705 |
if "TensorrtExecutionProvider" not in provs:
|
| 706 |
+
logger.warning("[trt-build] TRT provider not active (%s), keeping CUDA", provs)
|
| 707 |
return
|
| 708 |
|
| 709 |
# Run dummy inference to fully materialize the engine
|
|
|
|
| 713 |
trt_session.run(None, {inp_name: dummy})
|
| 714 |
|
| 715 |
dt = time.monotonic() - t0
|
| 716 |
+
logger.info("[trt-build] TRT engine ready in %.1fs β swapping person session", dt)
|
| 717 |
|
| 718 |
# Atomic swap β Python GIL makes single attribute assignment safe.
|
| 719 |
+
# Any in-flight inference holds a reference to the old session, which
|
| 720 |
+
# stays alive until that inference completes.
|
| 721 |
+
self.per_session = trt_session
|
| 722 |
+
self._trt_ready = True
|
| 723 |
+
|
| 724 |
+
logger.info("[trt-build] Person model now using TensorRT FP16")
|
|
|
|
|
|
|
| 725 |
except Exception as e:
|
| 726 |
+
logger.warning("[trt-build] TRT build failed (%s), keeping CUDA", e)
|
| 727 |
|
| 728 |
def __repr__(self) -> str:
|
| 729 |
+
trt_status = "TRT" if self._trt_ready else "CUDA (TRT building)"
|
| 730 |
+
return f"Unified Miner v3.16 β person={trt_status}, background TRT engine build"
|
|
|
|
| 731 |
|
| 732 |
# ββ Vehicle preprocessing (letterbox) βββββββββββββββββββββββββββββββββββ
|
| 733 |
|
|
|
|
| 785 |
boxes, confs, cls_ids = self._veh_run_pass(image_bgr, VEH_CONF_THRES, session)
|
| 786 |
|
| 787 |
# Flip TTA pass β horizontal flip, mirror boxes back
|
| 788 |
+
if ENABLE_TTA:
|
| 789 |
flipped = cv2.flip(image_bgr, 1)
|
| 790 |
f_boxes, f_confs, f_cls = self._veh_run_pass(flipped, VEH_TTA_CONF, session)
|
| 791 |
if len(f_boxes) > 0:
|
|
|
|
| 1161 |
confs = pred[:, 4] * np.max(pred[:, 5:], axis=1)
|
| 1162 |
return bool((confs >= VEH_PARTS_PLATE_CONF).any())
|
| 1163 |
|
| 1164 |
+
def _vehicle_parts_confirm(self, vehicle_boxes, person_boxes, image_bgr):
|
| 1165 |
"""Parts-based confidence scoring for vehicle detections.
|
| 1166 |
|
| 1167 |
Scoring hierarchy (confidence boosts are additive):
|
|
|
|
| 1182 |
img_area = float(oh * ow)
|
| 1183 |
has_plate_model = self.plate_session is not None
|
| 1184 |
# Skip plate checks on crowded scenes (aerial/drone, plates invisible)
|
| 1185 |
+
skip_plate = len(vehicle_boxes) > 20
|
|
|
|
|
|
|
| 1186 |
|
| 1187 |
result = []
|
| 1188 |
n_driver = 0
|
|
|
|
| 2083 |
|
| 2084 |
if element_hint == 'vehicle':
|
| 2085 |
# Run vehicle detection + parts confirmation with empty person_boxes.
|
| 2086 |
+
# Plate/headlight/window checks fire normally; driver/rider overlap
|
| 2087 |
+
# check finds no matches (boost=0) but doesn't suppress.
|
| 2088 |
vehicle_boxes = self._infer_vehicle(image_bgr)
|
| 2089 |
+
return self._vehicle_parts_confirm(vehicle_boxes, [], image_bgr)
|
| 2090 |
|
| 2091 |
# Fallback: run both (original behavior)
|
| 2092 |
if ENABLE_PARALLEL:
|