Spaces:

akhaliq
/

Kapao

Runtime error

Kapao / val.py

AK391

add files

e6e7cb5 over 3 years ago

14.7 kB

	import argparse
	import json
	import os, os.path as osp
	import sys
	from pathlib import Path

	import numpy as np
	import torch
	from tqdm import tqdm

	FILE = Path(__file__).absolute()
	sys.path.append(FILE.parents[0].as_posix()) # add kapao/ to path

	from models.experimental import attempt_load
	from utils.datasets import create_dataloader
	from utils.augmentations import letterbox
	from utils.general import check_dataset, check_file, check_img_size, \
	non_max_suppression_kp, scale_coords, set_logging, colorstr
	from utils.torch_utils import select_device, time_sync
	import tempfile
	import cv2

	PAD_COLOR = (114 / 255, 114 / 255, 114 / 255)


	def run_nms(data, model_out):
	if data['iou_thres'] == data['iou_thres_kp'] and data['conf_thres_kp'] >= data['conf_thres']:
	# Combined NMS saves ~0.2 ms / image
	dets = non_max_suppression_kp(model_out, data['conf_thres'], data['iou_thres'], num_coords=data['num_coords'])
	person_dets = [d[d[:, 5] == 0] for d in dets]
	kp_dets = [d[d[:, 4] >= data['conf_thres_kp']] for d in dets]
	kp_dets = [d[d[:, 5] > 0] for d in kp_dets]
	else:
	person_dets = non_max_suppression_kp(model_out, data['conf_thres'], data['iou_thres'],
	classes=[0],
	num_coords=data['num_coords'])

	kp_dets = non_max_suppression_kp(model_out, data['conf_thres_kp'], data['iou_thres_kp'],
	classes=list(range(1, 1 + len(data['kp_flip']))),
	num_coords=data['num_coords'])
	return person_dets, kp_dets


	def post_process_batch(data, imgs, paths, shapes, person_dets, kp_dets,
	two_stage=False, pad=0, device='cpu', model=None, origins=None):

	batch_bboxes, batch_poses, batch_scores, batch_ids = [], [], [], []
	n_fused = np.zeros(data['num_coords'] // 2)

	if origins is None: # used only for two-stage inference so set to 0 if None
	origins = [np.array([0, 0, 0]) for _ in range(len(person_dets))]

	# process each image in batch
	for si, (pd, kpd, origin) in enumerate(zip(person_dets, kp_dets, origins)):
	nd = pd.shape[0]
	nkp = kpd.shape[0]

	if nd:
	path, shape = Path(paths[si]) if len(paths) else '', shapes[si][0]
	img_id = int(osp.splitext(osp.split(path)[-1])[0]) if path else si

	# TWO-STAGE INFERENCE (EXPERIMENTAL)
	if two_stage:
	gs = max(int(model.stride.max()), 32) # grid size (max stride)
	crops, origins, crop_shapes = [], [], []

	for bbox in pd[:, :4].cpu().numpy():
	x1, y1, x2, y2 = map(int, map(round, bbox))
	x1, x2 = max(x1, 0), min(x2, data['imgsz'])
	y1, y2 = max(y1, 0), min(y2, data['imgsz'])
	h0, w0 = y2 - y1, x2 - x1
	crop_shapes.append([(h0, w0)])
	crop = np.transpose(imgs[si][:, y1:y2, x1:x2].cpu().numpy(), (1, 2, 0))
	crop = cv2.copyMakeBorder(crop, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=PAD_COLOR) # add padding
	h0 += 2 * pad
	w0 += 2 * pad
	origins = [np.array([x1 - pad, y1 - pad, 0])]
	crop_pre = letterbox(crop, data['imgsz'], color=PAD_COLOR, stride=gs, auto=False)[0]
	crop_input = torch.Tensor(np.transpose(np.expand_dims(crop_pre, axis=0), (0, 3, 1, 2))).to(device)

	out = model(crop_input, augment=True, kp_flip=data['kp_flip'], scales=data['scales'], flips=data['flips'])[0]
	person_dets, kp_dets = run_nms(data, out)
	_, poses, scores, img_ids, _ = post_process_batch(
	data, crop_input, paths, [[(h0, w0)]], person_dets, kp_dets, device=device, origins=origins)

	# map back to original image
	if len(poses):
	poses = np.stack(poses, axis=0)
	poses = poses[:, :, :2].reshape(poses.shape[0], -1)
	poses = scale_coords(imgs[si].shape[1:], poses, shape)
	poses = poses.reshape(poses.shape[0], data['num_coords'] // 2, 2)
	poses = np.concatenate((poses, np.zeros((poses.shape[0], data['num_coords'] // 2, 1))), axis=-1)
	poses = [p for p in poses] # convert back to list

	# SINGLE-STAGE INFERENCE
	else:
	scores = pd[:, 4].cpu().numpy() # person detection score
	bboxes = scale_coords(imgs[si].shape[1:], pd[:, :4], shape).round().cpu().numpy()
	poses = scale_coords(imgs[si].shape[1:], pd[:, -data['num_coords']:], shape).cpu().numpy()
	poses = poses.reshape((nd, -data['num_coords'], 2))
	poses = np.concatenate((poses, np.zeros((nd, poses.shape[1], 1))), axis=-1)

	if data['use_kp_dets'] and nkp:
	mask = scores > data['conf_thres_kp_person']
	poses_mask = poses[mask]

	if len(poses_mask):
	kpd[:, :4] = scale_coords(imgs[si].shape[1:], kpd[:, :4], shape)
	kpd = kpd[:, :6].cpu()

	for x1, y1, x2, y2, conf, cls in kpd:
	x, y = np.mean((x1, x2)), np.mean((y1, y2))
	pose_kps = poses_mask[:, int(cls - 1)]
	dist = np.linalg.norm(pose_kps[:, :2] - np.array([[x, y]]), axis=-1)
	kp_match = np.argmin(dist)
	if conf > pose_kps[kp_match, 2] and dist[kp_match] < data['overwrite_tol']:
	pose_kps[kp_match] = [x, y, conf]
	n_fused[int(cls - 1)] += 1
	poses[mask] = poses_mask

	poses = [p + origin for p in poses]

	batch_bboxes.extend(bboxes)
	batch_poses.extend(poses)
	batch_scores.extend(scores)
	batch_ids.extend([img_id] * len(scores))

	return batch_bboxes, batch_poses, batch_scores, batch_ids, n_fused


	@torch.no_grad()
	def run(data,
	weights=None, # model.pt path(s)
	batch_size=16, # batch size
	imgsz=1280, # inference size (pixels)
	task='val', # train, val, test, speed or study
	device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
	conf_thres=0.001, # confidence threshold
	iou_thres=0.65, # NMS IoU threshold
	no_kp_dets=False,
	conf_thres_kp=0.2,
	iou_thres_kp=0.25,
	conf_thres_kp_person=0.3,
	overwrite_tol=50, # pixels for kp det overwrite
	scales=[1],
	flips=[None],
	rect=False,
	half=True, # use FP16 half-precision inference
	model=None,
	dataloader=None,
	compute_loss=None,
	two_stage=False,
	pad=0
	):

	if two_stage:
	assert batch_size == 1, 'Batch size must be set to 1 for two-stage processing'
	assert conf_thres >= 0.01, 'Confidence threshold must be >= 0.01 for two-stage processing'
	assert not rect, 'Cannot use rectangular inference with two-stage processing'
	assert not half, 'Two-stage processing must use full precision'

	use_kp_dets = not no_kp_dets

	# Initialize/load model and set device
	training = model is not None
	if training: # called by train.py
	device = next(model.parameters()).device # get model device
	else: # called directly
	device = select_device(device, batch_size=batch_size)

	# Load model
	model = attempt_load(weights, map_location=device) # load FP32 model
	gs = max(int(model.stride.max()), 32) # grid size (max stride)
	imgsz = check_img_size(imgsz, s=gs) # check image size

	# Data
	data = check_dataset(data) # check

	# add inference settings to data dict
	data['imgsz'] = imgsz
	data['conf_thres'] = conf_thres
	data['iou_thres'] = iou_thres
	data['use_kp_dets'] = use_kp_dets
	data['conf_thres_kp'] = conf_thres_kp
	data['iou_thres_kp'] = iou_thres_kp
	data['conf_thres_kp_person'] = conf_thres_kp_person
	data['overwrite_tol'] = overwrite_tol
	data['scales'] = scales
	data['flips'] = flips

	is_coco = 'coco' in data['path']
	if is_coco:
	from pycocotools.coco import COCO
	from pycocotools.cocoeval import COCOeval
	else:
	from crowdposetools.coco import COCO
	from crowdposetools.cocoeval import COCOeval

	# Half
	half &= device.type != 'cpu' # half precision only supported on CUDA
	if half:
	model.half()

	model.eval()
	nc = int(data['nc']) # number of classes

	# Dataloader
	if not training:
	if device.type != 'cpu':
	model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
	task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images
	dataloader = create_dataloader(data[task], data['labels'], imgsz, batch_size, gs, rect=rect,
	prefix=colorstr(f'{task}: '), kp_flip=data['kp_flip'])[0]

	seen = 0
	mp, mr, map50, mAP, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0.
	loss = torch.zeros(4, device=device)
	json_dump = []
	n_fused = np.zeros(data['num_coords'] // 2)

	for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Processing {} images'.format(task))):
	t_ = time_sync()
	imgs = imgs.to(device, non_blocking=True)
	imgs = imgs.half() if half else imgs.float() # uint8 to fp16/32
	imgs /= 255.0 # 0 - 255 to 0.0 - 1.0
	targets = targets.to(device)
	nb, _, height, width = imgs.shape # batch size, channels, height, width
	t = time_sync()
	t0 += t - t_

	# Run model
	out, train_out = model(imgs, augment=True, kp_flip=data['kp_flip'], scales=data['scales'], flips=data['flips'])
	t1 += time_sync() - t

	# Compute loss
	if train_out: # only computed if no scale / flipping
	if compute_loss:
	loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls, kp

	t = time_sync()

	# NMS
	person_dets, kp_dets = run_nms(data, out)

	# Fuse keypoint and pose detections
	_, poses, scores, img_ids, n_fused_batch = post_process_batch(
	data, imgs, paths, shapes, person_dets, kp_dets, two_stage, pad, device, model)

	t2 += time_sync() - t
	seen += len(imgs)
	n_fused += n_fused_batch

	for i, (pose, score, img_id) in enumerate(zip(poses, scores, img_ids)):
	json_dump.append({
	'image_id': img_id,
	'category_id': 1,
	'keypoints': pose.reshape(-1).tolist(),
	'score': float(score) # person score
	})

	if not training: # save json
	save_dir, weights_name = osp.split(weights)
	json_name = '{}_{}_c{}_i{}_ck{}_ik{}_ckp{}_t{}.json'.format(
	task, osp.splitext(weights_name)[0],
	conf_thres, iou_thres, conf_thres_kp, iou_thres_kp,
	conf_thres_kp_person, overwrite_tol
	)
	json_path = osp.join(save_dir, json_name)
	else:
	tmp = tempfile.NamedTemporaryFile(mode='w+b')
	json_path = tmp.name

	with open(json_path, 'w') as f:
	json.dump(json_dump, f)

	if task in ('train', 'val'):
	annot = osp.join(data['path'], data['{}_annotations'.format(task)])
	coco = COCO(annot)
	result = coco.loadRes(json_path)
	eval = COCOeval(coco, result, iouType='keypoints')
	eval.evaluate()
	eval.accumulate()
	eval.summarize()
	mAP, map50 = eval.stats[:2]

	if training:
	tmp.close()

	# Print speeds
	t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image
	if not training and task != 'test':
	os.rename(json_path, osp.splitext(json_path)[0] + '_ap{:.4f}.json'.format(mAP))
	shape = (batch_size, 3, imgsz, imgsz)
	print(f'Speed: %.3fms pre-process, %.3fms inference, %.3fms NMS per image at shape {shape}' % t)
	print('Keypoint Objects Fused:', n_fused)
	model.float() # for training
	return (mp, mr, map50, mAP, *(loss.cpu() / len(dataloader)).tolist()), np.zeros(nc), t # for compatibility with train


	def parse_opt():
	parser = argparse.ArgumentParser(prog='val.py')
	parser.add_argument('--data', type=str, default='data/coco-kp.yaml', help='dataset.yaml path')
	parser.add_argument('--weights', default='kapao_s_coco.pt')
	parser.add_argument('--batch-size', type=int, default=1, help='batch size')
	parser.add_argument('--imgsz', type=int, default=1280, help='inference size (pixels)')
	parser.add_argument('--task', default='val', help='train, val, test')
	parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
	parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold')
	parser.add_argument('--iou-thres', type=float, default=0.65, help='NMS IoU threshold')
	parser.add_argument('--no-kp-dets', action='store_true', help='do not use keypoint objects')
	parser.add_argument('--conf-thres-kp', type=float, default=0.2)
	parser.add_argument('--conf-thres-kp-person', type=float, default=0.3)
	parser.add_argument('--iou-thres-kp', type=float, default=0.25)
	parser.add_argument('--overwrite-tol', type=int, default=50)
	parser.add_argument('--scales', type=float, nargs='+', default=[1])
	parser.add_argument('--flips', type=int, nargs='+', default=[-1])
	parser.add_argument('--rect', action='store_true', help='rectangular input image')
	parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
	parser.add_argument('--two-stage', action='store_true', help='use two-stage inference (experimental)')
	parser.add_argument('--pad', type=int, default=0, help='padding for two-stage inference')
	opt = parser.parse_args()
	opt.flips = [None if f == -1 else f for f in opt.flips]
	opt.data = check_file(opt.data) # check file
	return opt


	def main(opt):
	set_logging()
	print(colorstr('val: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
	if opt.task in ('train', 'val', 'test'): # run normally
	run(**vars(opt))


	if __name__ == "__main__":
	opt = parse_opt()
	main(opt)