Spaces:
Runtime error
Runtime error
| # CODE WAS MODIFIED FROM https://github.com/leoxiaobin/deep-high-resolution-net.pytorch | |
| import torch | |
| import cv2 | |
| import torchvision.transforms as transforms | |
| import numpy as np | |
| import math | |
| import torchvision | |
| import gradio as gr | |
| from PIL import Image | |
| import requests | |
| COCO_KEYPOINT_INDEXES = { | |
| 0: 'nose', | |
| 1: 'left_eye', | |
| 2: 'right_eye', | |
| 3: 'left_ear', | |
| 4: 'right_ear', | |
| 5: 'left_shoulder', | |
| 6: 'right_shoulder', | |
| 7: 'left_elbow', | |
| 8: 'right_elbow', | |
| 9: 'left_wrist', | |
| 10: 'right_wrist', | |
| 11: 'left_hip', | |
| 12: 'right_hip', | |
| 13: 'left_knee', | |
| 14: 'right_knee', | |
| 15: 'left_ankle', | |
| 16: 'right_ankle' | |
| } | |
| COCO_INSTANCE_CATEGORY_NAMES = [ | |
| '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', | |
| 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', | |
| 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', | |
| 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', | |
| 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', | |
| 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', | |
| 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', | |
| 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', | |
| 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', | |
| 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', | |
| 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', | |
| 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' | |
| ] | |
| def get_max_preds(batch_heatmaps): | |
| ''' | |
| get predictions from score maps | |
| heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) | |
| ''' | |
| assert isinstance(batch_heatmaps, np.ndarray), \ | |
| 'batch_heatmaps should be numpy.ndarray' | |
| assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' | |
| batch_size = batch_heatmaps.shape[0] | |
| num_joints = batch_heatmaps.shape[1] | |
| width = batch_heatmaps.shape[3] | |
| heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) | |
| idx = np.argmax(heatmaps_reshaped, 2) | |
| maxvals = np.amax(heatmaps_reshaped, 2) | |
| maxvals = maxvals.reshape((batch_size, num_joints, 1)) | |
| idx = idx.reshape((batch_size, num_joints, 1)) | |
| preds = np.tile(idx, (1, 1, 2)).astype(np.float32) | |
| preds[:, :, 0] = (preds[:, :, 0]) % width | |
| preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) | |
| pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) | |
| pred_mask = pred_mask.astype(np.float32) | |
| preds *= pred_mask | |
| return preds, maxvals | |
| def get_dir(src_point, rot_rad): | |
| sn, cs = np.sin(rot_rad), np.cos(rot_rad) | |
| src_result = [0, 0] | |
| src_result[0] = src_point[0] * cs - src_point[1] * sn | |
| src_result[1] = src_point[0] * sn + src_point[1] * cs | |
| return src_result | |
| def get_3rd_point(a, b): | |
| direct = a - b | |
| return b + np.array([-direct[1], direct[0]], dtype=np.float32) | |
| def get_affine_transform( | |
| center, scale, rot, output_size, | |
| shift=np.array([0, 0], dtype=np.float32), inv=0 | |
| ): | |
| if not isinstance(scale, np.ndarray) and not isinstance(scale, list): | |
| print(scale) | |
| scale = np.array([scale, scale]) | |
| scale_tmp = scale * 200.0 | |
| src_w = scale_tmp[0] | |
| dst_w = output_size[0] | |
| dst_h = output_size[1] | |
| rot_rad = np.pi * rot / 180 | |
| src_dir = get_dir([0, src_w * -0.5], rot_rad) | |
| dst_dir = np.array([0, dst_w * -0.5], np.float32) | |
| src = np.zeros((3, 2), dtype=np.float32) | |
| dst = np.zeros((3, 2), dtype=np.float32) | |
| src[0, :] = center + scale_tmp * shift | |
| src[1, :] = center + src_dir + scale_tmp * shift | |
| dst[0, :] = [dst_w * 0.5, dst_h * 0.5] | |
| dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir | |
| src[2:, :] = get_3rd_point(src[0, :], src[1, :]) | |
| dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) | |
| if inv: | |
| trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) | |
| else: | |
| trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) | |
| return trans | |
| def affine_transform(pt, t): | |
| new_pt = np.array([pt[0], pt[1], 1.]).T | |
| new_pt = np.dot(t, new_pt) | |
| return new_pt[:2] | |
| def transform_preds(coords, center, scale, output_size): | |
| target_coords = np.zeros(coords.shape) | |
| trans = get_affine_transform(center, scale, 0, output_size, inv=1) | |
| for p in range(coords.shape[0]): | |
| target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) | |
| return target_coords | |
| def taylor(hm, coord): | |
| heatmap_height = hm.shape[0] | |
| heatmap_width = hm.shape[1] | |
| px = int(coord[0]) | |
| py = int(coord[1]) | |
| if 1 < px < heatmap_width-2 and 1 < py < heatmap_height-2: | |
| dx = 0.5 * (hm[py][px+1] - hm[py][px-1]) | |
| dy = 0.5 * (hm[py+1][px] - hm[py-1][px]) | |
| dxx = 0.25 * (hm[py][px+2] - 2 * hm[py][px] + hm[py][px-2]) | |
| dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] | |
| + hm[py-1][px-1]) | |
| dyy = 0.25 * (hm[py+2*1][px] - 2 * hm[py][px] + hm[py-2*1][px]) | |
| derivative = np.matrix([[dx], [dy]]) | |
| hessian = np.matrix([[dxx, dxy], [dxy, dyy]]) | |
| if dxx * dyy - dxy ** 2 != 0: | |
| hessianinv = hessian.I | |
| offset = -hessianinv * derivative | |
| offset = np.squeeze(np.array(offset.T), axis=0) | |
| coord += offset | |
| return coord | |
| def gaussian_blur(hm, kernel): | |
| border = (kernel - 1) // 2 | |
| batch_size = hm.shape[0] | |
| num_joints = hm.shape[1] | |
| height = hm.shape[2] | |
| width = hm.shape[3] | |
| for i in range(batch_size): | |
| for j in range(num_joints): | |
| origin_max = np.max(hm[i, j]) | |
| dr = np.zeros((height + 2 * border, width + 2 * border)) | |
| dr[border: -border, border: -border] = hm[i, j].copy() | |
| dr = cv2.GaussianBlur(dr, (kernel, kernel), 0) | |
| hm[i, j] = dr[border: -border, border: -border].copy() | |
| hm[i, j] *= origin_max / np.max(hm[i, j]) | |
| return hm | |
| def get_final_preds(hm, center, scale, transform_back=True, test_blur_kernel=3): | |
| coords, maxvals = get_max_preds(hm) | |
| heatmap_height = hm.shape[2] | |
| heatmap_width = hm.shape[3] | |
| # post-processing | |
| hm = gaussian_blur(hm, test_blur_kernel) | |
| hm = np.maximum(hm, 1e-10) | |
| hm = np.log(hm) | |
| for n in range(coords.shape[0]): | |
| for p in range(coords.shape[1]): | |
| coords[n, p] = taylor(hm[n][p], coords[n][p]) | |
| preds = coords.copy() | |
| if transform_back: | |
| # Transform back | |
| for i in range(coords.shape[0]): | |
| preds[i] = transform_preds( | |
| coords[i], center[i], scale[i], [heatmap_width, heatmap_height] | |
| ) | |
| return preds, maxvals | |
| SKELETON = [ | |
| [1, 3], [1, 0], [2, 4], [2, 0], [0, 5], [0, 6], [5, 7], [7, 9], [6, 8], [8, 10], [5, 11], [6, 12], [11, 12], | |
| [11, 13], [13, 15], [12, 14], [14, 16] | |
| ] | |
| CocoColors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], | |
| [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], | |
| [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] | |
| NUM_KPTS = 17 | |
| def get_person_detection_boxes(model, img, threshold=0.5): | |
| pred = model(img) | |
| pred_classes = [COCO_INSTANCE_CATEGORY_NAMES[i] | |
| for i in list(pred[0]['labels'].cpu().numpy())] # Get the Prediction Score | |
| pred_boxes = [[(i[0], i[1]), (i[2], i[3])] | |
| for i in list(pred[0]['boxes'].detach().cpu().numpy())] # Bounding boxes | |
| pred_score = list(pred[0]['scores'].detach().cpu().numpy()) | |
| if not pred_score or max(pred_score) < threshold: | |
| return [] | |
| # Get list of index with score greater than threshold | |
| pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] | |
| pred_boxes = pred_boxes[:pred_t + 1] | |
| pred_classes = pred_classes[:pred_t + 1] | |
| person_boxes = [] | |
| for idx, box in enumerate(pred_boxes): | |
| if pred_classes[idx] == 'person': | |
| person_boxes.append(box) | |
| return person_boxes | |
| def draw_pose(keypoints, img): | |
| """draw the keypoints and the skeletons. | |
| :params keypoints: the shape should be equal to [17,2] | |
| :params img: | |
| """ | |
| assert keypoints.shape == (NUM_KPTS, 2) | |
| for i in range(len(SKELETON)): | |
| kpt_a, kpt_b = SKELETON[i][0], SKELETON[i][1] | |
| x_a, y_a = keypoints[kpt_a][0], keypoints[kpt_a][1] | |
| x_b, y_b = keypoints[kpt_b][0], keypoints[kpt_b][1] | |
| cv2.circle(img, (int(x_a), int(y_a)), 6, CocoColors[i], -1) | |
| cv2.circle(img, (int(x_b), int(y_b)), 6, CocoColors[i], -1) | |
| cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), CocoColors[i], 2) | |
| def box_to_center_scale(box, model_image_width, model_image_height): | |
| """convert a box to center,scale information required for pose transformation | |
| Parameters | |
| ---------- | |
| box : list of tuple | |
| list of length 2 with two tuples of floats representing | |
| bottom left and top right corner of a box | |
| model_image_width : int | |
| model_image_height : int | |
| Returns | |
| ------- | |
| (numpy array, numpy array) | |
| Two numpy arrays, coordinates for the center of the box and the scale of the box | |
| """ | |
| center = np.zeros((2), dtype=np.float32) | |
| bottom_left_corner = box[0] | |
| top_right_corner = box[1] | |
| box_width = top_right_corner[0] - bottom_left_corner[0] | |
| box_height = top_right_corner[1] - bottom_left_corner[1] | |
| bottom_left_x = bottom_left_corner[0] | |
| bottom_left_y = bottom_left_corner[1] | |
| center[0] = bottom_left_x + box_width * 0.5 | |
| center[1] = bottom_left_y + box_height * 0.5 | |
| aspect_ratio = model_image_width * 1.0 / model_image_height | |
| pixel_std = 200 | |
| if box_width > aspect_ratio * box_height: | |
| box_height = box_width * 1.0 / aspect_ratio | |
| elif box_width < aspect_ratio * box_height: | |
| box_width = box_height * aspect_ratio | |
| scale = np.array( | |
| [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std], | |
| dtype=np.float32) | |
| if center[0] != -1: | |
| scale = scale * 1.25 | |
| return center, scale | |
| def get_pose_estimation_prediction(pose_model, image, center, scale): | |
| rotation = 0 | |
| img_size = (256, 192) | |
| # pose estimation transformation | |
| trans = get_affine_transform(center, scale, rotation, img_size) | |
| model_input = cv2.warpAffine( | |
| image, | |
| trans, | |
| (int(img_size[0]), int(img_size[1])), | |
| flags=cv2.INTER_LINEAR) | |
| transform = transforms.Compose([ | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], | |
| std=[0.229, 0.224, 0.225]), | |
| ]) | |
| # pose estimation inference | |
| model_input = transform(model_input).unsqueeze(0) | |
| # switch to evaluate mode | |
| pose_model.eval() | |
| with torch.no_grad(): | |
| # compute output heatmap | |
| output = pose_model(model_input) | |
| preds, _ = get_final_preds( | |
| output.clone().cpu().numpy(), | |
| np.asarray([center]), | |
| np.asarray([scale])) | |
| return preds | |
| def main(image_bgr, backbone_choice, box_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)): | |
| CTX = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | |
| box_model.to(CTX) | |
| box_model.eval() | |
| if backbone_choice == "ResNet": | |
| backbone_choice = "tpr_a4_256x192" | |
| else: | |
| backbone_choice == "HRNet" | |
| backbone_choice = "tph_a4_256x192" | |
| model = torch.hub.load('yangsenius/TransPose:main', backbone_choice , pretrained=True) | |
| img_dimensions = (256, 192) | |
| input = [] | |
| image_rgb = image_bgr[:, :, [2, 1, 0]] | |
| img = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) | |
| img_tensor = torch.from_numpy(img / 255.).permute(2, 0, 1).float().to(CTX) | |
| input.append(img_tensor) | |
| pred_boxes = get_person_detection_boxes(box_model, input, threshold=0.9) | |
| if len(pred_boxes) >= 1: | |
| for box in pred_boxes: | |
| center, scale = box_to_center_scale(box, img_dimensions[0], img_dimensions[1]) | |
| image_pose = image_rgb.copy() | |
| pose_preds = get_pose_estimation_prediction(model, image_pose, center, scale) | |
| if len(pose_preds) >= 1: | |
| for kpt in pose_preds: | |
| draw_pose(kpt, image_bgr) # draw the poses | |
| return image_bgr | |
| title = "TransPose" | |
| description = "Gradio demo for TransPose: Keypoint localization via Transformer. Dataset: COCO train2017 & COCO val2017. Default backbone selection = HRNet. [Integrated on paperswithcode.com](https://paperswithcode.com/paper/transpose-towards-explainable-human-pose)" | |
| article = "<div style='text-align: center;'><a href='https://github.com/yangsenius/TransPose' target='_blank'>Full credits: github.com/yangsenius/TransPose</a></div>" | |
| examples = [["./examples/one.jpg", "HRNet"], ["./examples/two.jpg", "HRNet"]] | |
| iface = gr.Interface(main, inputs=[gr.inputs.Image(), gr.inputs.Radio(["HRNet", "ResNet"])], outputs="image", description=description, article=article, title=title, examples=examples) | |
| iface.launch(enable_queue=True, debug='True') |