scorevision: push artifact
Browse files
miner.py
CHANGED
|
@@ -161,32 +161,37 @@ class Miner:
|
|
| 161 |
self.input_h = 960
|
| 162 |
self.input_w = 960
|
| 163 |
|
| 164 |
-
# Pre-NMS confidence threshold.
|
| 165 |
-
#
|
| 166 |
-
#
|
| 167 |
-
#
|
| 168 |
-
#
|
| 169 |
-
|
|
|
|
| 170 |
# Gaussian Soft-NMS sigma. 0.5 is the textbook default — gentler
|
| 171 |
# than numberplate's 0.3 because beverage scenes are less crowded.
|
| 172 |
self.soft_nms_sigma = 0.5
|
| 173 |
# Final score floor after Soft-NMS decay.
|
| 174 |
self.score_threshold = 0.01
|
| 175 |
|
| 176 |
-
# Sane-box geometry filters.
|
| 177 |
-
#
|
| 178 |
-
#
|
| 179 |
-
# extreme aspect ratios). Values mirror the top miner's tuning.
|
| 180 |
self.min_box_area = 100 # 10x10 px²
|
| 181 |
self.min_side = 8 # min(w, h) in pixels
|
| 182 |
self.max_aspect_ratio = 8.0 # max(w/h, h/w)
|
| 183 |
-
|
| 184 |
-
#
|
| 185 |
-
#
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
#
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
# GPU warmup — force ORT/CUDA/cuDNN kernel compilation before the
|
| 192 |
# first real validator frame. Mirrors the numberplate miner pattern.
|
|
@@ -424,6 +429,11 @@ class Miner:
|
|
| 424 |
# would otherwise survive Soft-NMS's gentle decay above the score floor.
|
| 425 |
dets = self._cluster_dedup(dets, iou_thresh=0.5)
|
| 426 |
dets = self._soft_nms(dets)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
|
| 428 |
out_boxes: list[BoundingBox] = []
|
| 429 |
for x1, y1, x2, y2, conf, cls_id in dets:
|
|
@@ -510,6 +520,17 @@ class Miner:
|
|
| 510 |
offset: int,
|
| 511 |
n_keypoints: int,
|
| 512 |
) -> list[TVFrameResult]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
results: list[TVFrameResult] = []
|
| 514 |
infer = self._infer_with_tta if self.use_tta else self._infer_single
|
| 515 |
for idx, image in enumerate(batch_images):
|
|
|
|
| 161 |
self.input_h = 960
|
| 162 |
self.input_w = 960
|
| 163 |
|
| 164 |
+
# Pre-NMS confidence threshold. v3 sets 0.55 to match the only
|
| 165 |
+
# working SN44-beverage miner (alfred8995). Their empirical tuning
|
| 166 |
+
# heavily prioritises the false_positive pillar (40% of composite);
|
| 167 |
+
# validator-side data shows their 0.55 threshold consistently
|
| 168 |
+
# delivers nonzero composites while looser thresholds (incl. our
|
| 169 |
+
# earlier 0.4 attempt) score 0.
|
| 170 |
+
self.conf_threshold = 0.55
|
| 171 |
# Gaussian Soft-NMS sigma. 0.5 is the textbook default — gentler
|
| 172 |
# than numberplate's 0.3 because beverage scenes are less crowded.
|
| 173 |
self.soft_nms_sigma = 0.5
|
| 174 |
# Final score floor after Soft-NMS decay.
|
| 175 |
self.score_threshold = 0.01
|
| 176 |
|
| 177 |
+
# Sane-box geometry filters. v3 keeps the alfred-aligned values
|
| 178 |
+
# (100 area / 8 side / 8 AR). Loosening them was a hypothesis
|
| 179 |
+
# that contradicted alfred's working empirical tuning.
|
|
|
|
| 180 |
self.min_box_area = 100 # 10x10 px²
|
| 181 |
self.min_side = 8 # min(w, h) in pixels
|
| 182 |
self.max_aspect_ratio = 8.0 # max(w/h, h/w)
|
| 183 |
+
# Per-image detection cap. Mirrors alfred's max_det=150 — caps
|
| 184 |
+
# over-prediction in dense or noisy scenes that would otherwise
|
| 185 |
+
# tank the false_positive pillar.
|
| 186 |
+
self.max_det = 150
|
| 187 |
+
|
| 188 |
+
# Horizontal-flip TTA. DISABLED in v3 because the actual latency cap
|
| 189 |
+
# is RTF≤1.0 with service_rate_fps=1, which means p95 ≤ 5000 ms per
|
| 190 |
+
# /predict call. Empirical batch tests showed our chute fails the
|
| 191 |
+
# gate at 20+ frames per call. Halving inference cost (TTA off) gives
|
| 192 |
+
# ~2× headroom. Scoring impact: small recall loss (TTA usually adds
|
| 193 |
+
# +0.5–2% mAP), worth it to clear the gate.
|
| 194 |
+
self.use_tta = False
|
| 195 |
|
| 196 |
# GPU warmup — force ORT/CUDA/cuDNN kernel compilation before the
|
| 197 |
# first real validator frame. Mirrors the numberplate miner pattern.
|
|
|
|
| 429 |
# would otherwise survive Soft-NMS's gentle decay above the score floor.
|
| 430 |
dets = self._cluster_dedup(dets, iou_thresh=0.5)
|
| 431 |
dets = self._soft_nms(dets)
|
| 432 |
+
# Cap per-image detection count (mirrors alfred). Soft-NMS already
|
| 433 |
+
# returns dets sorted by descending decayed score, so [:max_det]
|
| 434 |
+
# keeps the top-confidence ones.
|
| 435 |
+
if len(dets) > self.max_det:
|
| 436 |
+
dets = dets[: self.max_det]
|
| 437 |
|
| 438 |
out_boxes: list[BoundingBox] = []
|
| 439 |
for x1, y1, x2, y2, conf, cls_id in dets:
|
|
|
|
| 520 |
offset: int,
|
| 521 |
n_keypoints: int,
|
| 522 |
) -> list[TVFrameResult]:
|
| 523 |
+
# v3 diagnostic: log batch_size to stderr so chute logs reveal what
|
| 524 |
+
# the validator actually sends per /predict call. Used to confirm
|
| 525 |
+
# whether the latency gate is the failure mode (large batches ⇒
|
| 526 |
+
# high p95). Cheap; one print per batch.
|
| 527 |
+
import sys as _sys
|
| 528 |
+
_sys.stderr.write(
|
| 529 |
+
f"[trace] predict_batch n={len(batch_images)} offset={offset} "
|
| 530 |
+
f"n_kp={n_keypoints} use_tta={self.use_tta}\n"
|
| 531 |
+
)
|
| 532 |
+
_sys.stderr.flush()
|
| 533 |
+
|
| 534 |
results: list[TVFrameResult] = []
|
| 535 |
infer = self._infer_with_tta if self.use_tta else self._infer_single
|
| 536 |
for idx, image in enumerate(batch_images):
|