meaculpitt commited on
Commit
e2fb1ca
·
verified ·
1 Parent(s): 122de2b

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +38 -17
miner.py CHANGED
@@ -161,32 +161,37 @@ class Miner:
161
  self.input_h = 960
162
  self.input_w = 960
163
 
164
- # Pre-NMS confidence threshold. Raised from 0.15 → 0.4 to align with
165
- # the SN44 beverage scoring pillar weights (map50 0.6 + false_positive
166
- # 0.4); the FP pillar penalises over-prediction so a higher conf
167
- # threshold trades a little recall for a meaningful precision gain.
168
- # Matches the conf_thres tuning of the current top miner (alfred8995).
169
- self.conf_threshold = 0.4
 
170
  # Gaussian Soft-NMS sigma. 0.5 is the textbook default — gentler
171
  # than numberplate's 0.3 because beverage scenes are less crowded.
172
  self.soft_nms_sigma = 0.5
173
  # Final score floor after Soft-NMS decay.
174
  self.score_threshold = 0.01
175
 
176
- # Sane-box geometry filters. Spurious detections are a major source of
177
- # false_positive pillar damage; these constraints reject obviously
178
- # non-beverage geometry (1×1 pixel "boxes", needle-thin slivers,
179
- # extreme aspect ratios). Values mirror the top miner's tuning.
180
  self.min_box_area = 100 # 10x10 px²
181
  self.min_side = 8 # min(w, h) in pixels
182
  self.max_aspect_ratio = 8.0 # max(w/h, h/w)
183
-
184
- # Horizontal-flip TTA. Runs inference twice (original + h-flipped),
185
- # un-flips the x-coords on the flipped view, merges via cluster_dedup
186
- # to suppress duplicates. Doubles inference cost but the validator
187
- # latency cap is 10s and our single-pass inference is ~10ms, so we
188
- # have ~1000× headroom.
189
- self.use_tta = True
 
 
 
 
 
190
 
191
  # GPU warmup — force ORT/CUDA/cuDNN kernel compilation before the
192
  # first real validator frame. Mirrors the numberplate miner pattern.
@@ -424,6 +429,11 @@ class Miner:
424
  # would otherwise survive Soft-NMS's gentle decay above the score floor.
425
  dets = self._cluster_dedup(dets, iou_thresh=0.5)
426
  dets = self._soft_nms(dets)
 
 
 
 
 
427
 
428
  out_boxes: list[BoundingBox] = []
429
  for x1, y1, x2, y2, conf, cls_id in dets:
@@ -510,6 +520,17 @@ class Miner:
510
  offset: int,
511
  n_keypoints: int,
512
  ) -> list[TVFrameResult]:
 
 
 
 
 
 
 
 
 
 
 
513
  results: list[TVFrameResult] = []
514
  infer = self._infer_with_tta if self.use_tta else self._infer_single
515
  for idx, image in enumerate(batch_images):
 
161
  self.input_h = 960
162
  self.input_w = 960
163
 
164
+ # Pre-NMS confidence threshold. v3 sets 0.55 to match the only
165
+ # working SN44-beverage miner (alfred8995). Their empirical tuning
166
+ # heavily prioritises the false_positive pillar (40% of composite);
167
+ # validator-side data shows their 0.55 threshold consistently
168
+ # delivers nonzero composites while looser thresholds (incl. our
169
+ # earlier 0.4 attempt) score 0.
170
+ self.conf_threshold = 0.55
171
  # Gaussian Soft-NMS sigma. 0.5 is the textbook default — gentler
172
  # than numberplate's 0.3 because beverage scenes are less crowded.
173
  self.soft_nms_sigma = 0.5
174
  # Final score floor after Soft-NMS decay.
175
  self.score_threshold = 0.01
176
 
177
+ # Sane-box geometry filters. v3 keeps the alfred-aligned values
178
+ # (100 area / 8 side / 8 AR). Loosening them was a hypothesis
179
+ # that contradicted alfred's working empirical tuning.
 
180
  self.min_box_area = 100 # 10x10 px²
181
  self.min_side = 8 # min(w, h) in pixels
182
  self.max_aspect_ratio = 8.0 # max(w/h, h/w)
183
+ # Per-image detection cap. Mirrors alfred's max_det=150 — caps
184
+ # over-prediction in dense or noisy scenes that would otherwise
185
+ # tank the false_positive pillar.
186
+ self.max_det = 150
187
+
188
+ # Horizontal-flip TTA. DISABLED in v3 because the actual latency cap
189
+ # is RTF≤1.0 with service_rate_fps=1, which means p95 ≤ 5000 ms per
190
+ # /predict call. Empirical batch tests showed our chute fails the
191
+ # gate at 20+ frames per call. Halving inference cost (TTA off) gives
192
+ # ~2× headroom. Scoring impact: small recall loss (TTA usually adds
193
+ # +0.5–2% mAP), worth it to clear the gate.
194
+ self.use_tta = False
195
 
196
  # GPU warmup — force ORT/CUDA/cuDNN kernel compilation before the
197
  # first real validator frame. Mirrors the numberplate miner pattern.
 
429
  # would otherwise survive Soft-NMS's gentle decay above the score floor.
430
  dets = self._cluster_dedup(dets, iou_thresh=0.5)
431
  dets = self._soft_nms(dets)
432
+ # Cap per-image detection count (mirrors alfred). Soft-NMS already
433
+ # returns dets sorted by descending decayed score, so [:max_det]
434
+ # keeps the top-confidence ones.
435
+ if len(dets) > self.max_det:
436
+ dets = dets[: self.max_det]
437
 
438
  out_boxes: list[BoundingBox] = []
439
  for x1, y1, x2, y2, conf, cls_id in dets:
 
520
  offset: int,
521
  n_keypoints: int,
522
  ) -> list[TVFrameResult]:
523
+ # v3 diagnostic: log batch_size to stderr so chute logs reveal what
524
+ # the validator actually sends per /predict call. Used to confirm
525
+ # whether the latency gate is the failure mode (large batches ⇒
526
+ # high p95). Cheap; one print per batch.
527
+ import sys as _sys
528
+ _sys.stderr.write(
529
+ f"[trace] predict_batch n={len(batch_images)} offset={offset} "
530
+ f"n_kp={n_keypoints} use_tta={self.use_tta}\n"
531
+ )
532
+ _sys.stderr.flush()
533
+
534
  results: list[TVFrameResult] = []
535
  infer = self._infer_with_tta if self.use_tta else self._infer_single
536
  for idx, image in enumerate(batch_images):