Person / inference.py
Sharath33's picture
Upload folder using huggingface_hub
45f16b6 verified
import cv2
import numpy as np
import onnxruntime as ort
# --- Config (from model.phd.cfg) ---
MODEL_PATH = "yolov11_phd_s.onnx"
LABEL_PATH = "../models/crowd_human.names"
IMAGE_PATH = "test_image.jpg"
CONF_THRESHOLD = 0.2 # pre-cluster-threshold
IOU_THRESHOLD = 0.6 # nms-iou-threshold
NET_SCALE_FACTOR = 0.0039215697906911373 # net-scale-factor (β‰ˆ1/255)
MODEL_COLOR_FORMAT = 0 # 0 = BGR (no channel swap)
TOPK = 300 # topk
def load_labels(label_path):
with open(label_path) as f:
return [line.strip() for line in f if line.strip()]
def load_model(model_path):
session = ort.InferenceSession(
model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"] # gpu-id=0, CPU fallback
)
input_meta = session.get_inputs()[0]
input_name = input_meta.name
_, _, h, w = input_meta.shape # NCHW β†’ extract H, W
return session, input_name, (h, w)
def preprocess(image, input_size):
"""Letterbox resize + normalize."""
h_in, w_in = input_size
h_orig, w_orig = image.shape[:2]
# Letterbox scaling (preserves aspect ratio)
scale = min(w_in / w_orig, h_in / h_orig)
new_w, new_h = int(w_orig * scale), int(h_orig * scale)
resized = cv2.resize(image, (new_w, new_h))
# Pad to input size
canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8)
pad_top = (h_in - new_h) // 2
pad_left = (w_in - new_w) // 2
canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized
# Normalize β€” model-color-format=0 means BGR input, no channel swap
img = canvas.astype(np.float32) * NET_SCALE_FACTOR # scale by net-scale-factor
img = np.transpose(img, (2, 0, 1)) # HWC β†’ CHW
img = np.expand_dims(img, axis=0) # Add batch dim
return img, scale, pad_top, pad_left
def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh):
"""
YOLOv11 output shape: (1, 4 + num_classes, num_anchors)
For dual-class (person + head): (1, 6, 8400)
"""
preds = output[0] # (1, 6, 8400)
preds = preds[0] # (6, 8400)
preds = preds.T # (8400, 6) β†’ each row = one anchor
boxes_raw = preds[:, :4] # cx, cy, w, h
class_scores = preds[:, 4:] # (8400, 2) β€” one score per class
# Best class per anchor
class_ids = np.argmax(class_scores, axis=1)
scores = class_scores[np.arange(len(class_scores)), class_ids]
# Filter by confidence
mask = scores >= conf_thresh
boxes_raw = boxes_raw[mask]
scores = scores[mask]
class_ids = class_ids[mask]
if len(scores) == 0:
return []
# Convert cx,cy,w,h β†’ x1,y1,x2,y2 and undo letterbox
x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale
y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top) / scale
x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale
y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top) / scale
boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int) # for NMS
# NMS with topk cap
indices = cv2.dnn.NMSBoxes(
boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh
)
results = []
for i in indices[:TOPK]:
idx = i[0] if isinstance(i, (list, np.ndarray)) else i
x, y, w, h = boxes_xyxy[idx]
results.append({
"bbox": (x, y, x + w, y + h),
"score": float(scores[idx]),
"class_id": int(class_ids[idx])
})
return results
def draw(image, detections, labels):
for det in detections:
x1, y1, x2, y2 = det["bbox"]
label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}"
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2)
cv2.putText(image, f"{label} {det['score']:.2f}",
(x1, max(y1 - 8, 0)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
return image
# --- Main ---
labels = load_labels(LABEL_PATH)
session, input_name, input_size = load_model(MODEL_PATH)
print(f"Model input size: {input_size}")
image = cv2.imread(IMAGE_PATH)
tensor, scale, pad_top, pad_left = preprocess(image, input_size)
outputs = session.run(None, {input_name: tensor})
detections = postprocess(outputs, scale, pad_top, pad_left,
CONF_THRESHOLD, IOU_THRESHOLD)
print(f"Detected {len(detections)} heads")
for d in detections:
print(f" BBox: {d['bbox']}, Score: {d['score']:.3f}")
result = draw(image.copy(), detections, labels)
cv2.imwrite("output.jpg", result)
cv2.imshow("Detections", result)
cv2.waitKey(0)