| import cv2 |
| import numpy as np |
| import onnxruntime as ort |
|
|
| |
| MODEL_PATH = "yolov11_phd_s.onnx" |
| LABEL_PATH = "../models/crowd_human.names" |
| IMAGE_PATH = "test_image.jpg" |
| CONF_THRESHOLD = 0.2 |
| IOU_THRESHOLD = 0.6 |
| NET_SCALE_FACTOR = 0.0039215697906911373 |
| MODEL_COLOR_FORMAT = 0 |
| TOPK = 300 |
|
|
| def load_labels(label_path): |
| with open(label_path) as f: |
| return [line.strip() for line in f if line.strip()] |
|
|
| def load_model(model_path): |
| session = ort.InferenceSession( |
| model_path, |
| providers=["CUDAExecutionProvider", "CPUExecutionProvider"] |
| ) |
| input_meta = session.get_inputs()[0] |
| input_name = input_meta.name |
| _, _, h, w = input_meta.shape |
| return session, input_name, (h, w) |
|
|
| def preprocess(image, input_size): |
| """Letterbox resize + normalize.""" |
| h_in, w_in = input_size |
| h_orig, w_orig = image.shape[:2] |
|
|
| |
| scale = min(w_in / w_orig, h_in / h_orig) |
| new_w, new_h = int(w_orig * scale), int(h_orig * scale) |
| resized = cv2.resize(image, (new_w, new_h)) |
|
|
| |
| canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8) |
| pad_top = (h_in - new_h) // 2 |
| pad_left = (w_in - new_w) // 2 |
| canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized |
|
|
| |
| img = canvas.astype(np.float32) * NET_SCALE_FACTOR |
| img = np.transpose(img, (2, 0, 1)) |
| img = np.expand_dims(img, axis=0) |
|
|
| return img, scale, pad_top, pad_left |
|
|
| def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh): |
| """ |
| YOLOv11 output shape: (1, 4 + num_classes, num_anchors) |
| For dual-class (person + head): (1, 6, 8400) |
| """ |
| preds = output[0] |
| preds = preds[0] |
| preds = preds.T |
|
|
| boxes_raw = preds[:, :4] |
| class_scores = preds[:, 4:] |
|
|
| |
| class_ids = np.argmax(class_scores, axis=1) |
| scores = class_scores[np.arange(len(class_scores)), class_ids] |
|
|
| |
| mask = scores >= conf_thresh |
| boxes_raw = boxes_raw[mask] |
| scores = scores[mask] |
| class_ids = class_ids[mask] |
|
|
| if len(scores) == 0: |
| return [] |
|
|
| |
| x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale |
| y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top) / scale |
| x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale |
| y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top) / scale |
|
|
| boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int) |
|
|
| |
| indices = cv2.dnn.NMSBoxes( |
| boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh |
| ) |
|
|
| results = [] |
| for i in indices[:TOPK]: |
| idx = i[0] if isinstance(i, (list, np.ndarray)) else i |
| x, y, w, h = boxes_xyxy[idx] |
| results.append({ |
| "bbox": (x, y, x + w, y + h), |
| "score": float(scores[idx]), |
| "class_id": int(class_ids[idx]) |
| }) |
| return results |
|
|
| def draw(image, detections, labels): |
| for det in detections: |
| x1, y1, x2, y2 = det["bbox"] |
| label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}" |
| cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2) |
| cv2.putText(image, f"{label} {det['score']:.2f}", |
| (x1, max(y1 - 8, 0)), |
| cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2) |
| return image |
|
|
| |
| labels = load_labels(LABEL_PATH) |
| session, input_name, input_size = load_model(MODEL_PATH) |
| print(f"Model input size: {input_size}") |
|
|
| image = cv2.imread(IMAGE_PATH) |
| tensor, scale, pad_top, pad_left = preprocess(image, input_size) |
|
|
| outputs = session.run(None, {input_name: tensor}) |
|
|
| detections = postprocess(outputs, scale, pad_top, pad_left, |
| CONF_THRESHOLD, IOU_THRESHOLD) |
|
|
| print(f"Detected {len(detections)} heads") |
| for d in detections: |
| print(f" BBox: {d['bbox']}, Score: {d['score']:.3f}") |
|
|
| result = draw(image.copy(), detections, labels) |
| cv2.imwrite("output.jpg", result) |
| cv2.imshow("Detections", result) |
| cv2.waitKey(0) |