# Copyright (c) Facebook, Inc. and its affiliates. import cv2 import numpy as np import torch import torch.nn.functional as F import os COLORS = ((np.random.rand(1300, 3) * 0.4 + 0.6) * 255).astype( np.uint8).reshape(1300, 1, 1, 3) def _get_color_image(heatmap): heatmap = heatmap.reshape( heatmap.shape[0], heatmap.shape[1], heatmap.shape[2], 1) if heatmap.shape[0] == 1: color_map = (heatmap * np.ones((1, 1, 1, 3), np.uint8) * 255).max( axis=0).astype(np.uint8) # H, W, 3 else: color_map = (heatmap * COLORS[:heatmap.shape[0]]).max(axis=0).astype(np.uint8) # H, W, 3 return color_map def _blend_image(image, color_map, a=0.7): color_map = cv2.resize(color_map, (image.shape[1], image.shape[0])) ret = np.clip(image * (1 - a) + color_map * a, 0, 255).astype(np.uint8) return ret def _blend_image_heatmaps(image, color_maps, a=0.7): merges = np.zeros((image.shape[0], image.shape[1], 3), np.float32) for color_map in color_maps: color_map = cv2.resize(color_map, (image.shape[1], image.shape[0])) merges = np.maximum(merges, color_map) ret = np.clip(image * (1 - a) + merges * a, 0, 255).astype(np.uint8) return ret def _decompose_level(x, shapes_per_level, N): ''' x: LNHiWi x C ''' x = x.view(x.shape[0], -1) ret = [] st = 0 for l in range(len(shapes_per_level)): ret.append([]) h = shapes_per_level[l][0].int().item() w = shapes_per_level[l][1].int().item() for i in range(N): ret[l].append(x[st + h * w * i:st + h * w * (i + 1)].view( h, w, -1).permute(2, 0, 1)) st += h * w * N return ret def _imagelist_to_tensor(images): images = [x for x in images] image_sizes = [x.shape[-2:] for x in images] h = max([size[0] for size in image_sizes]) w = max([size[1] for size in image_sizes]) S = 32 h, w = ((h - 1) // S + 1) * S, ((w - 1) // S + 1) * S images = [F.pad(x, (0, w - x.shape[2], 0, h - x.shape[1], 0, 0)) \ for x in images] images = torch.stack(images) return images def _ind2il(ind, shapes_per_level, N): r = ind l = 0 S = 0 while r - S >= N * shapes_per_level[l][0] * shapes_per_level[l][1]: S += N * shapes_per_level[l][0] * shapes_per_level[l][1] l += 1 i = (r - S) // (shapes_per_level[l][0] * shapes_per_level[l][1]) return i, l def debug_train( images, gt_instances, flattened_hms, reg_targets, labels, pos_inds, shapes_per_level, locations, strides): ''' images: N x 3 x H x W flattened_hms: LNHiWi x C shapes_per_level: L x 2 [(H_i, W_i)] locations: LNHiWi x 2 ''' reg_inds = torch.nonzero( reg_targets.max(dim=1)[0] > 0).squeeze(1) N = len(images) images = _imagelist_to_tensor(images) repeated_locations = [torch.cat([loc] * N, dim=0) \ for loc in locations] locations = torch.cat(repeated_locations, dim=0) gt_hms = _decompose_level(flattened_hms, shapes_per_level, N) masks = flattened_hms.new_zeros((flattened_hms.shape[0], 1)) masks[pos_inds] = 1 masks = _decompose_level(masks, shapes_per_level, N) for i in range(len(images)): image = images[i].detach().cpu().numpy().transpose(1, 2, 0) color_maps = [] for l in range(len(gt_hms)): color_map = _get_color_image( gt_hms[l][i].detach().cpu().numpy()) color_maps.append(color_map) cv2.imshow('gthm_{}'.format(l), color_map) blend = _blend_image_heatmaps(image.copy(), color_maps) if gt_instances is not None: bboxes = gt_instances[i].gt_boxes.tensor for j in range(len(bboxes)): bbox = bboxes[j] cv2.rectangle( blend, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 3, cv2.LINE_AA) for j in range(len(pos_inds)): image_id, l = _ind2il(pos_inds[j], shapes_per_level, N) if image_id != i: continue loc = locations[pos_inds[j]] cv2.drawMarker( blend, (int(loc[0]), int(loc[1])), (0, 255, 255), markerSize=(l + 1) * 16) for j in range(len(reg_inds)): image_id, l = _ind2il(reg_inds[j], shapes_per_level, N) if image_id != i: continue ltrb = reg_targets[reg_inds[j]] ltrb *= strides[l] loc = locations[reg_inds[j]] bbox = [(loc[0] - ltrb[0]), (loc[1] - ltrb[1]), (loc[0] + ltrb[2]), (loc[1] + ltrb[3])] cv2.rectangle( blend, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1, cv2.LINE_AA) cv2.circle(blend, (int(loc[0]), int(loc[1])), 2, (255, 0, 0), -1) cv2.imshow('blend', blend) cv2.waitKey() def debug_test( images, logits_pred, reg_pred, agn_hm_pred=[], preds=[], vis_thresh=0.3, debug_show_name=False, mult_agn=False): ''' images: N x 3 x H x W class_target: LNHiWi x C cat_agn_heatmap: LNHiWi shapes_per_level: L x 2 [(H_i, W_i)] ''' N = len(images) for i in range(len(images)): image = images[i].detach().cpu().numpy().transpose(1, 2, 0) result = image.copy().astype(np.uint8) pred_image = image.copy().astype(np.uint8) color_maps = [] L = len(logits_pred) for l in range(L): if logits_pred[0] is not None: stride = min(image.shape[0], image.shape[1]) / min( logits_pred[l][i].shape[1], logits_pred[l][i].shape[2]) else: stride = min(image.shape[0], image.shape[1]) / min( agn_hm_pred[l][i].shape[1], agn_hm_pred[l][i].shape[2]) stride = stride if stride < 60 else 64 if stride < 100 else 128 if logits_pred[0] is not None: if mult_agn: logits_pred[l][i] = logits_pred[l][i] * agn_hm_pred[l][i] color_map = _get_color_image( logits_pred[l][i].detach().cpu().numpy()) color_maps.append(color_map) cv2.imshow('predhm_{}'.format(l), color_map) if debug_show_name: from detectron2.data.datasets.lvis_v1_categories import LVIS_CATEGORIES cat2name = [x['name'] for x in LVIS_CATEGORIES] for j in range(len(preds[i].scores) if preds is not None else 0): if preds[i].scores[j] > vis_thresh: bbox = preds[i].proposal_boxes[j] \ if preds[i].has('proposal_boxes') else \ preds[i].pred_boxes[j] bbox = bbox.tensor[0].detach().cpu().numpy().astype(np.int32) cat = int(preds[i].pred_classes[j]) \ if preds[i].has('pred_classes') else 0 cl = COLORS[cat, 0, 0] cv2.rectangle( pred_image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (int(cl[0]), int(cl[1]), int(cl[2])), 2, cv2.LINE_AA) if debug_show_name: txt = '{}{:.1f}'.format( cat2name[cat] if cat > 0 else '', preds[i].scores[j]) font = cv2.FONT_HERSHEY_SIMPLEX cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0] cv2.rectangle( pred_image, (int(bbox[0]), int(bbox[1] - cat_size[1] - 2)), (int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (int(cl[0]), int(cl[1]), int(cl[2])), -1) cv2.putText( pred_image, txt, (int(bbox[0]), int(bbox[1] - 2)), font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) if agn_hm_pred[l] is not None: agn_hm_ = agn_hm_pred[l][i, 0, :, :, None].detach().cpu().numpy() agn_hm_ = (agn_hm_ * np.array([255, 255, 255]).reshape( 1, 1, 3)).astype(np.uint8) cv2.imshow('agn_hm_{}'.format(l), agn_hm_) blend = _blend_image_heatmaps(image.copy(), color_maps) cv2.imshow('blend', blend) cv2.imshow('preds', pred_image) cv2.waitKey() global cnt cnt = 0 def debug_second_stage(images, instances, proposals=None, vis_thresh=0.3, save_debug=False, debug_show_name=False, image_labels=[], save_debug_path='output/save_debug/', bgr=False): images = _imagelist_to_tensor(images) if 'COCO' in save_debug_path: from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES cat2name = [x['name'] for x in COCO_CATEGORIES] else: from detectron2.data.datasets.lvis_v1_categories import LVIS_CATEGORIES cat2name = ['({}){}'.format(x['frequency'], x['name']) \ for x in LVIS_CATEGORIES] for i in range(len(images)): image = images[i].detach().cpu().numpy().transpose(1, 2, 0).astype(np.uint8).copy() if bgr: image = image[:, :, ::-1].copy() if instances[i].has('gt_boxes'): bboxes = instances[i].gt_boxes.tensor.cpu().numpy() scores = np.ones(bboxes.shape[0]) cats = instances[i].gt_classes.cpu().numpy() else: bboxes = instances[i].pred_boxes.tensor.cpu().numpy() scores = instances[i].scores.cpu().numpy() cats = instances[i].pred_classes.cpu().numpy() for j in range(len(bboxes)): if scores[j] > vis_thresh: bbox = bboxes[j] cl = COLORS[cats[j], 0, 0] cl = (int(cl[0]), int(cl[1]), int(cl[2])) cv2.rectangle( image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cl, 2, cv2.LINE_AA) if debug_show_name: cat = cats[j] txt = '{}{:.1f}'.format( cat2name[cat] if cat > 0 else '', scores[j]) font = cv2.FONT_HERSHEY_SIMPLEX cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0] cv2.rectangle( image, (int(bbox[0]), int(bbox[1] - cat_size[1] - 2)), (int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (int(cl[0]), int(cl[1]), int(cl[2])), -1) cv2.putText( image, txt, (int(bbox[0]), int(bbox[1] - 2)), font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) if proposals is not None: proposal_image = images[i].detach().cpu().numpy().transpose(1, 2, 0).astype(np.uint8).copy() if bgr: proposal_image = proposal_image.copy() else: proposal_image = proposal_image[:, :, ::-1].copy() bboxes = proposals[i].proposal_boxes.tensor.cpu().numpy() if proposals[i].has('scores'): scores = proposals[i].scores.detach().cpu().numpy() else: scores = proposals[i].objectness_logits.detach().cpu().numpy() # selected = -1 # if proposals[i].has('image_loss'): # selected = proposals[i].image_loss.argmin() if proposals[i].has('selected'): selected = proposals[i].selected else: selected = [-1 for _ in range(len(bboxes))] for j in range(len(bboxes)): if scores[j] > vis_thresh or selected[j] >= 0: bbox = bboxes[j] cl = (209, 159, 83) th = 2 if selected[j] >= 0: cl = (0, 0, 0xa4) th = 4 cv2.rectangle( proposal_image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), cl, th, cv2.LINE_AA) if selected[j] >= 0 and debug_show_name: cat = selected[j].item() txt = '{}'.format(cat2name[cat]) font = cv2.FONT_HERSHEY_SIMPLEX cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0] cv2.rectangle( proposal_image, (int(bbox[0]), int(bbox[1] - cat_size[1] - 2)), (int(bbox[0] + cat_size[0]), int(bbox[1] - 2)), (int(cl[0]), int(cl[1]), int(cl[2])), -1) cv2.putText( proposal_image, txt, (int(bbox[0]), int(bbox[1] - 2)), font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) if save_debug: global cnt cnt = (cnt + 1) % 5000 if not os.path.exists(save_debug_path): os.mkdir(save_debug_path) save_name = '{}/{:05d}.jpg'.format(save_debug_path, cnt) if i < len(image_labels): image_label = image_labels[i] save_name = '{}/{:05d}'.format(save_debug_path, cnt) for x in image_label: class_name = cat2name[x] save_name = save_name + '|{}'.format(class_name) save_name = save_name + '.jpg' cv2.imwrite(save_name, proposal_image) else: cv2.imshow('image', image) if proposals is not None: cv2.imshow('proposals', proposal_image) cv2.waitKey()