Person / inference.py

Upload folder using huggingface_hub

45f16b6 verified 6 days ago

4.7 kB

	import cv2
	import numpy as np
	import onnxruntime as ort

	# --- Config (from model.phd.cfg) ---
	MODEL_PATH = "yolov11_phd_s.onnx"
	LABEL_PATH = "../models/crowd_human.names"
	IMAGE_PATH = "test_image.jpg"
	CONF_THRESHOLD = 0.2 # pre-cluster-threshold
	IOU_THRESHOLD = 0.6 # nms-iou-threshold
	NET_SCALE_FACTOR = 0.0039215697906911373 # net-scale-factor (≈1/255)
	MODEL_COLOR_FORMAT = 0 # 0 = BGR (no channel swap)
	TOPK = 300 # topk

	def load_labels(label_path):
	with open(label_path) as f:
	return [line.strip() for line in f if line.strip()]

	def load_model(model_path):
	session = ort.InferenceSession(
	model_path,
	providers=["CUDAExecutionProvider", "CPUExecutionProvider"] # gpu-id=0, CPU fallback
	)
	input_meta = session.get_inputs()[0]
	input_name = input_meta.name
	_, _, h, w = input_meta.shape # NCHW → extract H, W
	return session, input_name, (h, w)

	def preprocess(image, input_size):
	"""Letterbox resize + normalize."""
	h_in, w_in = input_size
	h_orig, w_orig = image.shape[:2]

	# Letterbox scaling (preserves aspect ratio)
	scale = min(w_in / w_orig, h_in / h_orig)
	new_w, new_h = int(w_orig * scale), int(h_orig * scale)
	resized = cv2.resize(image, (new_w, new_h))

	# Pad to input size
	canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8)
	pad_top = (h_in - new_h) // 2
	pad_left = (w_in - new_w) // 2
	canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized

	# Normalize — model-color-format=0 means BGR input, no channel swap
	img = canvas.astype(np.float32) * NET_SCALE_FACTOR # scale by net-scale-factor
	img = np.transpose(img, (2, 0, 1)) # HWC → CHW
	img = np.expand_dims(img, axis=0) # Add batch dim

	return img, scale, pad_top, pad_left

	def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh):
	"""
	YOLOv11 output shape: (1, 4 + num_classes, num_anchors)
	For dual-class (person + head): (1, 6, 8400)
	"""
	preds = output[0] # (1, 6, 8400)
	preds = preds[0] # (6, 8400)
	preds = preds.T # (8400, 6) → each row = one anchor

	boxes_raw = preds[:, :4] # cx, cy, w, h
	class_scores = preds[:, 4:] # (8400, 2) — one score per class

	# Best class per anchor
	class_ids = np.argmax(class_scores, axis=1)
	scores = class_scores[np.arange(len(class_scores)), class_ids]

	# Filter by confidence
	mask = scores >= conf_thresh
	boxes_raw = boxes_raw[mask]
	scores = scores[mask]
	class_ids = class_ids[mask]

	if len(scores) == 0:
	return []

	# Convert cx,cy,w,h → x1,y1,x2,y2 and undo letterbox
	x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale
	y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top) / scale
	x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale
	y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top) / scale

	boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int) # for NMS

	# NMS with topk cap
	indices = cv2.dnn.NMSBoxes(
	boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh
	)

	results = []
	for i in indices[:TOPK]:
	idx = i[0] if isinstance(i, (list, np.ndarray)) else i
	x, y, w, h = boxes_xyxy[idx]
	results.append({
	"bbox": (x, y, x + w, y + h),
	"score": float(scores[idx]),
	"class_id": int(class_ids[idx])
	})
	return results

	def draw(image, detections, labels):
	for det in detections:
	x1, y1, x2, y2 = det["bbox"]
	label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}"
	cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2)
	cv2.putText(image, f"{label} {det['score']:.2f}",
	(x1, max(y1 - 8, 0)),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
	return image

	# --- Main ---
	labels = load_labels(LABEL_PATH)
	session, input_name, input_size = load_model(MODEL_PATH)
	print(f"Model input size: {input_size}")

	image = cv2.imread(IMAGE_PATH)
	tensor, scale, pad_top, pad_left = preprocess(image, input_size)

	outputs = session.run(None, {input_name: tensor})

	detections = postprocess(outputs, scale, pad_top, pad_left,
	CONF_THRESHOLD, IOU_THRESHOLD)

	print(f"Detected {len(detections)} heads")
	for d in detections:
	print(f" BBox: {d['bbox']}, Score: {d['score']:.3f}")

	result = draw(image.copy(), detections, labels)
	cv2.imwrite("output.jpg", result)
	cv2.imshow("Detections", result)
	cv2.waitKey(0)