conf_threshold 0.25β0.55: composite +30% (FP/img 6.26β1.09, mAP50 71.8β66.5%)
3383aee verified | """ | |
| Score Vision SN44 β VehicleDetect miner endpoint. | |
| Class mapping (output indices): | |
| 0 = car (COCO class 2) | |
| 1 = bus (COCO class 5) | |
| 2 = truck (COCO class 7) | |
| 3 = motorcycle (COCO class 3) | |
| Accepts: base64-encoded image or raw image bytes via chutes cord. | |
| Returns: list of {bbox: [x1,y1,x2,y2], score: float, class_id: int, class_name: str} | |
| CUDA fix: onnxruntime-gpu finds cuDNN via ldconfig (registered during image build), | |
| with ctypes preload as belt-and-suspenders fallback. | |
| """ | |
| from __future__ import annotations | |
| import base64 | |
| import io | |
| import os | |
| import time | |
| from pathlib import Path | |
| from typing import Any | |
| import ctypes | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| # ββ cuDNN preload (belt-and-suspenders fallback) ββββββββββββββββββββββββββββββ | |
| # Primary fix is ldconfig at image build time (see Image builder below). | |
| # This ctypes preload catches any edge cases where ld.so.cache isn't used. | |
| def _preload_cuda_libs() -> None: | |
| _NVIDIA = "/usr/local/lib/python3.12/dist-packages/nvidia" | |
| _LIBS = [ | |
| "/usr/lib/x86_64-linux-gnu/libcuda.so.1", # driver stub β must be first | |
| f"{_NVIDIA}/cublas/lib/libcublasLt.so.12", | |
| f"{_NVIDIA}/cublas/lib/libcublas.so.12", | |
| f"{_NVIDIA}/cudnn/lib/libcudnn.so.9", | |
| ] | |
| for path in _LIBS: | |
| if os.path.exists(path): | |
| try: | |
| ctypes.CDLL(path, mode=ctypes.RTLD_GLOBAL) | |
| except OSError: | |
| pass | |
| _preload_cuda_libs() | |
| import onnxruntime as ort # noqa: E402 β must come after preload | |
| # ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_DIR = Path(__file__).parent | |
| WEIGHTS = MODEL_DIR / "weights.onnx" | |
| IMG_SIZE = 640 | |
| CONF_THRESH = 0.55 # sweep: max composite score (0.60ΓmAP + 0.40ΓFP_score) at conf=0.55 | |
| IOU_THRESH = 0.45 | |
| # COCO class index β submission class index | |
| COCO_TO_OUT: dict[int, int] = {2: 0, 5: 1, 7: 2, 3: 3} | |
| COCO_VEHICLE_IDX = list(COCO_TO_OUT.keys()) | |
| OUT_NAMES = ["car", "bus", "truck", "motorcycle"] | |
| # ββ Model loader (singleton) βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _SESSION: ort.InferenceSession | None = None | |
| def get_session() -> ort.InferenceSession: | |
| global _SESSION | |
| if _SESSION is None: | |
| opts = ort.SessionOptions() | |
| opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL | |
| opts.enable_mem_pattern = True | |
| opts.enable_mem_reuse = True | |
| cuda_opts = { | |
| "device_id": 0, | |
| "arena_extend_strategy": "kNextPowerOfTwo", | |
| "gpu_mem_limit": 2 * 1024 ** 3, | |
| "cudnn_conv_algo_search": "EXHAUSTIVE", | |
| "do_copy_in_default_stream": True, | |
| } | |
| _SESSION = ort.InferenceSession( | |
| str(WEIGHTS), | |
| sess_options=opts, | |
| providers=[ | |
| ("CUDAExecutionProvider", cuda_opts), | |
| "CPUExecutionProvider", | |
| ], | |
| ) | |
| provider = _SESSION.get_providers()[0] | |
| print(f"[miner] Model loaded. Provider: {provider}", flush=True) | |
| return _SESSION | |
| # ββ Preprocessing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def letterbox(img: np.ndarray, size: int = IMG_SIZE) -> tuple[np.ndarray, float, int, int]: | |
| h, w = img.shape[:2] | |
| r = min(size / h, size / w) | |
| new_w, new_h = int(round(w * r)), int(round(h * r)) | |
| img_r = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) | |
| dw, dh = size - new_w, size - new_h | |
| pad_l, pad_t = dw // 2, dh // 2 | |
| img_p = cv2.copyMakeBorder( | |
| img_r, pad_t, dh - pad_t, pad_l, dw - pad_l, | |
| cv2.BORDER_CONSTANT, value=(114, 114, 114), | |
| ) | |
| return img_p, r, pad_l, pad_t | |
| def preprocess(img_bgr: np.ndarray) -> tuple[np.ndarray, float, int, int]: | |
| img_p, ratio, pad_l, pad_t = letterbox(img_bgr) | |
| img_rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB) | |
| inp = img_rgb.transpose(2, 0, 1).astype(np.float32) * (1.0 / 255.0) | |
| return np.ascontiguousarray(inp[np.newaxis]), ratio, pad_l, pad_t | |
| # ββ NMS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float = IOU_THRESH) -> list[int]: | |
| if not len(boxes): | |
| return [] | |
| x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] | |
| areas = (x2 - x1) * (y2 - y1) | |
| order = scores.argsort()[::-1] | |
| keep: list[int] = [] | |
| while len(order): | |
| i = order[0] | |
| keep.append(int(i)) | |
| xx1 = np.maximum(x1[i], x1[order[1:]]) | |
| yy1 = np.maximum(y1[i], y1[order[1:]]) | |
| xx2 = np.minimum(x2[i], x2[order[1:]]) | |
| yy2 = np.minimum(y2[i], y2[order[1:]]) | |
| inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1) | |
| iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-7) | |
| order = order[1:][iou <= iou_thresh] | |
| return keep | |
| # ββ Postprocessing βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def postprocess( | |
| raw: np.ndarray, | |
| ratio: float, | |
| pad_l: int, | |
| pad_t: int, | |
| orig_w: int, | |
| orig_h: int, | |
| ) -> list[dict[str, Any]]: | |
| pred = raw # [84, 8400] | |
| veh_row_idx = np.array([4 + c for c in COCO_VEHICLE_IDX]) | |
| max_veh_score = pred[veh_row_idx].max(axis=0) | |
| mask = max_veh_score > CONF_THRESH | |
| if not mask.any(): | |
| return [] | |
| pred_f = pred[:, mask] | |
| cx, cy, bw, bh = pred_f[0], pred_f[1], pred_f[2], pred_f[3] | |
| x1 = np.clip((cx - bw / 2 - pad_l) / ratio, 0, orig_w) | |
| y1 = np.clip((cy - bh / 2 - pad_t) / ratio, 0, orig_h) | |
| x2 = np.clip((cx + bw / 2 - pad_l) / ratio, 0, orig_w) | |
| y2 = np.clip((cy + bh / 2 - pad_t) / ratio, 0, orig_h) | |
| boxes = np.stack([x1, y1, x2, y2], axis=1) | |
| results: list[dict[str, Any]] = [] | |
| for coco_cls in COCO_VEHICLE_IDX: | |
| scores = pred_f[4 + coco_cls] | |
| cls_mask = scores > CONF_THRESH | |
| if not cls_mask.any(): | |
| continue | |
| keep = nms(boxes[cls_mask], scores[cls_mask]) | |
| out_cls = COCO_TO_OUT[coco_cls] | |
| for k in keep: | |
| box = boxes[cls_mask][k] | |
| results.append({ | |
| "bbox": [ | |
| float(box[0]), float(box[1]), | |
| float(box[2]), float(box[3]), | |
| ], | |
| "score": float(scores[cls_mask][k]), | |
| "class_id": out_cls, | |
| "class_name": OUT_NAMES[out_cls], | |
| }) | |
| return results | |
| # ββ Image decoding helpers βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def decode_image(data: bytes | str) -> np.ndarray: | |
| if isinstance(data, str): | |
| data = base64.b64decode(data) | |
| elif isinstance(data, (bytes, bytearray)): | |
| try: | |
| data = base64.b64decode(data) | |
| except Exception: | |
| pass | |
| arr = np.frombuffer(data, dtype=np.uint8) | |
| img = cv2.imdecode(arr, cv2.IMREAD_COLOR) | |
| if img is None: | |
| pil = Image.open(io.BytesIO(data)).convert("RGB") | |
| img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR) | |
| return img | |
| # ββ Core predict function ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def predict(image_data: bytes | str | np.ndarray) -> dict[str, Any]: | |
| sess = get_session() | |
| if isinstance(image_data, np.ndarray): | |
| img_bgr = image_data | |
| else: | |
| img_bgr = decode_image(image_data) | |
| orig_h, orig_w = img_bgr.shape[:2] | |
| inp, ratio, pad_l, pad_t = preprocess(img_bgr) | |
| t0 = time.perf_counter() | |
| outputs = sess.run(None, {"images": inp}) | |
| infer_ms = (time.perf_counter() - t0) * 1000.0 | |
| raw = outputs[0][0] # [84, 8400] | |
| detections = postprocess(raw, ratio, pad_l, pad_t, orig_w, orig_h) | |
| return { | |
| "detections": detections, | |
| "inference_ms": round(infer_ms, 3), | |
| "provider": sess.get_providers()[0], | |
| } | |
| # ββ Chutes cord wrapper ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| try: | |
| from chutes.chute import Chute | |
| from chutes.chute.node_selector import NodeSelector | |
| from chutes.image import Image as ChuteImage | |
| chute_image = ( | |
| ChuteImage( | |
| username="lculpitt", | |
| name="vehicle-detect-sn44", | |
| tag="v4-cuda", | |
| readme=(Path(__file__).parent / "README.md").read_text(), | |
| ) | |
| .from_base("parachutes/python:3.12") | |
| .run_command("pip install --upgrade setuptools wheel") | |
| .run_command( | |
| "pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' " | |
| "'opencv-python-headless>=4.7' 'pillow>=9.5' " | |
| "'huggingface_hub>=0.19.4' 'pydantic>=2.0' " | |
| "'pyyaml>=6.0' 'aiohttp>=3.9'" | |
| ) | |
| # Bake cuDNN/cuBLAS paths into the image as Docker ENV so onnxruntime | |
| # CUDAExecutionProvider finds libcudnn.so.9 on every node at container start. | |
| .with_env( | |
| "LD_LIBRARY_PATH", | |
| "/usr/local/lib/python3.12/dist-packages/nvidia/cudnn/lib" | |
| ":/usr/local/lib/python3.12/dist-packages/nvidia/cublas/lib", | |
| ) | |
| ) | |
| chute = Chute( | |
| username="lculpitt", | |
| name="vehicle-detect-sn44", | |
| tagline="YOLO11n vehicle detector β car, bus, truck, motorcycle", | |
| readme=(Path(__file__).parent / "README.md").read_text(), | |
| image=chute_image, | |
| concurrency=4, | |
| max_instances=5, | |
| shutdown_after_seconds=300, | |
| scaling_threshold=0.5, | |
| node_selector=NodeSelector( | |
| gpu_count=1, | |
| min_vram_gb_per_gpu=16, | |
| # All CUDA 12.x, all $0.40β$0.85/hr (within 2.5Γ spread from cheapest) | |
| include=["4090", "a40", "a6000", "l40", "l40s"], | |
| ), | |
| ) | |
| async def predict_cord(image_b64: str) -> dict: | |
| """ | |
| POST /predict | |
| Body: {"image_b64": "<base64-encoded image>"} | |
| Returns detection JSON. | |
| """ | |
| return predict(image_b64) | |
| except ImportError: | |
| pass | |
| # ββ Local test βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| import sys | |
| print("=" * 55) | |
| print(" miner.py β local smoke test") | |
| print("=" * 55) | |
| dummy_bgr = np.full((720, 1280, 3), 128, dtype=np.uint8) | |
| cv2.rectangle(dummy_bgr, (100, 100), (400, 300), (0, 255, 0), 3) | |
| if len(sys.argv) > 1: | |
| loaded = cv2.imread(sys.argv[1]) | |
| if loaded is not None: | |
| dummy_bgr = loaded | |
| print(f" Using image: {sys.argv[1]} ({loaded.shape[1]}x{loaded.shape[0]})") | |
| else: | |
| print(f" Could not load {sys.argv[1]}, using dummy.") | |
| else: | |
| print(" Using synthetic 1280x720 dummy image.") | |
| result = predict(dummy_bgr) | |
| print(f"\n Provider : {result['provider']}") | |
| print(f" Inference : {result['inference_ms']:.2f} ms") | |
| print(f" Detections : {len(result['detections'])}") | |
| for d in result["detections"]: | |
| x1, y1, x2, y2 = [round(v, 1) for v in d["bbox"]] | |
| print(f" [{d['class_id']}] {d['class_name']:12s} score={d['score']:.3f} " | |
| f"bbox=[{x1},{y1},{x2},{y2}]") | |
| print("\n Latency benchmark (50 runs)...") | |
| times = [] | |
| for _ in range(50): | |
| t0 = time.perf_counter() | |
| predict(dummy_bgr) | |
| times.append((time.perf_counter() - t0) * 1000) | |
| times.sort() | |
| p50, p95 = times[25], times[47] | |
| fps = 1000.0 / p50 | |
| print(f" P50={p50:.2f}ms P95={p95:.2f}ms FPS={fps:.1f}") | |
| print(f" Target >=30 FPS : {'PASS' if fps >= 30 else 'FAIL'}") | |
| print(f" Target P95<50ms : {'PASS' if p95 < 50 else 'FAIL'}") | |
| print("=" * 55) | |