ChatGPT-ImageCaptioner

Paused

App Files Files Community

ChatGPT-ImageCaptioner / detic /modeling /debug.py

taesiri

Duplicate from taesiri/DeticChatGPT

f97cf44 over 1 year ago

raw

history blame

14.2 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	import cv2
	import numpy as np
	import torch
	import torch.nn.functional as F
	import os

	COLORS = ((np.random.rand(1300, 3) * 0.4 + 0.6) * 255).astype(
	np.uint8).reshape(1300, 1, 1, 3)

	def _get_color_image(heatmap):
	heatmap = heatmap.reshape(
	heatmap.shape[0], heatmap.shape[1], heatmap.shape[2], 1)
	if heatmap.shape[0] == 1:
	color_map = (heatmap * np.ones((1, 1, 1, 3), np.uint8) * 255).max(
	axis=0).astype(np.uint8) # H, W, 3
	else:
	color_map = (heatmap * COLORS[:heatmap.shape[0]]).max(axis=0).astype(np.uint8) # H, W, 3

	return color_map

	def _blend_image(image, color_map, a=0.7):
	color_map = cv2.resize(color_map, (image.shape[1], image.shape[0]))
	ret = np.clip(image * (1 - a) + color_map * a, 0, 255).astype(np.uint8)
	return ret

	def _blend_image_heatmaps(image, color_maps, a=0.7):
	merges = np.zeros((image.shape[0], image.shape[1], 3), np.float32)
	for color_map in color_maps:
	color_map = cv2.resize(color_map, (image.shape[1], image.shape[0]))
	merges = np.maximum(merges, color_map)
	ret = np.clip(image * (1 - a) + merges * a, 0, 255).astype(np.uint8)
	return ret

	def _decompose_level(x, shapes_per_level, N):
	'''
	x: LNHiWi x C
	'''
	x = x.view(x.shape[0], -1)
	ret = []
	st = 0
	for l in range(len(shapes_per_level)):
	ret.append([])
	h = shapes_per_level[l][0].int().item()
	w = shapes_per_level[l][1].int().item()
	for i in range(N):
	ret[l].append(x[st + h * w * i:st + h * w * (i + 1)].view(
	h, w, -1).permute(2, 0, 1))
	st += h * w * N
	return ret

	def _imagelist_to_tensor(images):
	images = [x for x in images]
	image_sizes = [x.shape[-2:] for x in images]
	h = max([size[0] for size in image_sizes])
	w = max([size[1] for size in image_sizes])
	S = 32
	h, w = ((h - 1) // S + 1) * S, ((w - 1) // S + 1) * S
	images = [F.pad(x, (0, w - x.shape[2], 0, h - x.shape[1], 0, 0)) \
	for x in images]
	images = torch.stack(images)
	return images


	def _ind2il(ind, shapes_per_level, N):
	r = ind
	l = 0
	S = 0
	while r - S >= N * shapes_per_level[l][0] * shapes_per_level[l][1]:
	S += N * shapes_per_level[l][0] * shapes_per_level[l][1]
	l += 1
	i = (r - S) // (shapes_per_level[l][0] * shapes_per_level[l][1])
	return i, l

	def debug_train(
	images, gt_instances, flattened_hms, reg_targets, labels, pos_inds,
	shapes_per_level, locations, strides):
	'''
	images: N x 3 x H x W
	flattened_hms: LNHiWi x C
	shapes_per_level: L x 2 [(H_i, W_i)]
	locations: LNHiWi x 2
	'''
	reg_inds = torch.nonzero(
	reg_targets.max(dim=1)[0] > 0).squeeze(1)
	N = len(images)
	images = _imagelist_to_tensor(images)
	repeated_locations = [torch.cat([loc] * N, dim=0) \
	for loc in locations]
	locations = torch.cat(repeated_locations, dim=0)
	gt_hms = _decompose_level(flattened_hms, shapes_per_level, N)
	masks = flattened_hms.new_zeros((flattened_hms.shape[0], 1))
	masks[pos_inds] = 1
	masks = _decompose_level(masks, shapes_per_level, N)
	for i in range(len(images)):
	image = images[i].detach().cpu().numpy().transpose(1, 2, 0)
	color_maps = []
	for l in range(len(gt_hms)):
	color_map = _get_color_image(
	gt_hms[l][i].detach().cpu().numpy())
	color_maps.append(color_map)
	cv2.imshow('gthm_{}'.format(l), color_map)
	blend = _blend_image_heatmaps(image.copy(), color_maps)
	if gt_instances is not None:
	bboxes = gt_instances[i].gt_boxes.tensor
	for j in range(len(bboxes)):
	bbox = bboxes[j]
	cv2.rectangle(
	blend,
	(int(bbox[0]), int(bbox[1])),
	(int(bbox[2]), int(bbox[3])),
	(0, 0, 255), 3, cv2.LINE_AA)

	for j in range(len(pos_inds)):
	image_id, l = _ind2il(pos_inds[j], shapes_per_level, N)
	if image_id != i:
	continue
	loc = locations[pos_inds[j]]
	cv2.drawMarker(
	blend, (int(loc[0]), int(loc[1])), (0, 255, 255),
	markerSize=(l + 1) * 16)

	for j in range(len(reg_inds)):
	image_id, l = _ind2il(reg_inds[j], shapes_per_level, N)
	if image_id != i:
	continue
	ltrb = reg_targets[reg_inds[j]]
	ltrb *= strides[l]
	loc = locations[reg_inds[j]]
	bbox = [(loc[0] - ltrb[0]), (loc[1] - ltrb[1]),
	(loc[0] + ltrb[2]), (loc[1] + ltrb[3])]
	cv2.rectangle(
	blend,
	(int(bbox[0]), int(bbox[1])),
	(int(bbox[2]), int(bbox[3])),
	(255, 0, 0), 1, cv2.LINE_AA)
	cv2.circle(blend, (int(loc[0]), int(loc[1])), 2, (255, 0, 0), -1)

	cv2.imshow('blend', blend)
	cv2.waitKey()


	def debug_test(
	images, logits_pred, reg_pred, agn_hm_pred=[], preds=[],
	vis_thresh=0.3, debug_show_name=False, mult_agn=False):
	'''
	images: N x 3 x H x W
	class_target: LNHiWi x C
	cat_agn_heatmap: LNHiWi
	shapes_per_level: L x 2 [(H_i, W_i)]
	'''
	N = len(images)
	for i in range(len(images)):
	image = images[i].detach().cpu().numpy().transpose(1, 2, 0)
	result = image.copy().astype(np.uint8)
	pred_image = image.copy().astype(np.uint8)
	color_maps = []
	L = len(logits_pred)
	for l in range(L):
	if logits_pred[0] is not None:
	stride = min(image.shape[0], image.shape[1]) / min(
	logits_pred[l][i].shape[1], logits_pred[l][i].shape[2])
	else:
	stride = min(image.shape[0], image.shape[1]) / min(
	agn_hm_pred[l][i].shape[1], agn_hm_pred[l][i].shape[2])
	stride = stride if stride < 60 else 64 if stride < 100 else 128
	if logits_pred[0] is not None:
	if mult_agn:
	logits_pred[l][i] = logits_pred[l][i] * agn_hm_pred[l][i]
	color_map = _get_color_image(
	logits_pred[l][i].detach().cpu().numpy())
	color_maps.append(color_map)
	cv2.imshow('predhm_{}'.format(l), color_map)

	if debug_show_name:
	from detectron2.data.datasets.lvis_v1_categories import LVIS_CATEGORIES
	cat2name = [x['name'] for x in LVIS_CATEGORIES]
	for j in range(len(preds[i].scores) if preds is not None else 0):
	if preds[i].scores[j] > vis_thresh:
	bbox = preds[i].proposal_boxes[j] \
	if preds[i].has('proposal_boxes') else \
	preds[i].pred_boxes[j]
	bbox = bbox.tensor[0].detach().cpu().numpy().astype(np.int32)
	cat = int(preds[i].pred_classes[j]) \
	if preds[i].has('pred_classes') else 0
	cl = COLORS[cat, 0, 0]
	cv2.rectangle(
	pred_image, (int(bbox[0]), int(bbox[1])),
	(int(bbox[2]), int(bbox[3])),
	(int(cl[0]), int(cl[1]), int(cl[2])), 2, cv2.LINE_AA)
	if debug_show_name:
	txt = '{}{:.1f}'.format(
	cat2name[cat] if cat > 0 else '',
	preds[i].scores[j])
	font = cv2.FONT_HERSHEY_SIMPLEX
	cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
	cv2.rectangle(
	pred_image,
	(int(bbox[0]), int(bbox[1] - cat_size[1] - 2)),
	(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)),
	(int(cl[0]), int(cl[1]), int(cl[2])), -1)
	cv2.putText(
	pred_image, txt, (int(bbox[0]), int(bbox[1] - 2)),
	font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)


	if agn_hm_pred[l] is not None:
	agn_hm_ = agn_hm_pred[l][i, 0, :, :, None].detach().cpu().numpy()
	agn_hm_ = (agn_hm_ * np.array([255, 255, 255]).reshape(
	1, 1, 3)).astype(np.uint8)
	cv2.imshow('agn_hm_{}'.format(l), agn_hm_)
	blend = _blend_image_heatmaps(image.copy(), color_maps)
	cv2.imshow('blend', blend)
	cv2.imshow('preds', pred_image)
	cv2.waitKey()

	global cnt
	cnt = 0

	def debug_second_stage(images, instances, proposals=None, vis_thresh=0.3,
	save_debug=False, debug_show_name=False, image_labels=[],
	save_debug_path='output/save_debug/',
	bgr=False):
	images = _imagelist_to_tensor(images)
	if 'COCO' in save_debug_path:
	from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
	cat2name = [x['name'] for x in COCO_CATEGORIES]
	else:
	from detectron2.data.datasets.lvis_v1_categories import LVIS_CATEGORIES
	cat2name = ['({}){}'.format(x['frequency'], x['name']) \
	for x in LVIS_CATEGORIES]
	for i in range(len(images)):
	image = images[i].detach().cpu().numpy().transpose(1, 2, 0).astype(np.uint8).copy()
	if bgr:
	image = image[:, :, ::-1].copy()
	if instances[i].has('gt_boxes'):
	bboxes = instances[i].gt_boxes.tensor.cpu().numpy()
	scores = np.ones(bboxes.shape[0])
	cats = instances[i].gt_classes.cpu().numpy()
	else:
	bboxes = instances[i].pred_boxes.tensor.cpu().numpy()
	scores = instances[i].scores.cpu().numpy()
	cats = instances[i].pred_classes.cpu().numpy()
	for j in range(len(bboxes)):
	if scores[j] > vis_thresh:
	bbox = bboxes[j]
	cl = COLORS[cats[j], 0, 0]
	cl = (int(cl[0]), int(cl[1]), int(cl[2]))
	cv2.rectangle(
	image,
	(int(bbox[0]), int(bbox[1])),
	(int(bbox[2]), int(bbox[3])),
	cl, 2, cv2.LINE_AA)
	if debug_show_name:
	cat = cats[j]
	txt = '{}{:.1f}'.format(
	cat2name[cat] if cat > 0 else '',
	scores[j])
	font = cv2.FONT_HERSHEY_SIMPLEX
	cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
	cv2.rectangle(
	image,
	(int(bbox[0]), int(bbox[1] - cat_size[1] - 2)),
	(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)),
	(int(cl[0]), int(cl[1]), int(cl[2])), -1)
	cv2.putText(
	image, txt, (int(bbox[0]), int(bbox[1] - 2)),
	font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
	if proposals is not None:
	proposal_image = images[i].detach().cpu().numpy().transpose(1, 2, 0).astype(np.uint8).copy()
	if bgr:
	proposal_image = proposal_image.copy()
	else:
	proposal_image = proposal_image[:, :, ::-1].copy()
	bboxes = proposals[i].proposal_boxes.tensor.cpu().numpy()
	if proposals[i].has('scores'):
	scores = proposals[i].scores.detach().cpu().numpy()
	else:
	scores = proposals[i].objectness_logits.detach().cpu().numpy()
	# selected = -1
	# if proposals[i].has('image_loss'):
	# selected = proposals[i].image_loss.argmin()
	if proposals[i].has('selected'):
	selected = proposals[i].selected
	else:
	selected = [-1 for _ in range(len(bboxes))]
	for j in range(len(bboxes)):
	if scores[j] > vis_thresh or selected[j] >= 0:
	bbox = bboxes[j]
	cl = (209, 159, 83)
	th = 2
	if selected[j] >= 0:
	cl = (0, 0, 0xa4)
	th = 4
	cv2.rectangle(
	proposal_image,
	(int(bbox[0]), int(bbox[1])),
	(int(bbox[2]), int(bbox[3])),
	cl, th, cv2.LINE_AA)
	if selected[j] >= 0 and debug_show_name:
	cat = selected[j].item()
	txt = '{}'.format(cat2name[cat])
	font = cv2.FONT_HERSHEY_SIMPLEX
	cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
	cv2.rectangle(
	proposal_image,
	(int(bbox[0]), int(bbox[1] - cat_size[1] - 2)),
	(int(bbox[0] + cat_size[0]), int(bbox[1] - 2)),
	(int(cl[0]), int(cl[1]), int(cl[2])), -1)
	cv2.putText(
	proposal_image, txt,
	(int(bbox[0]), int(bbox[1] - 2)),
	font, 0.5, (0, 0, 0), thickness=1,
	lineType=cv2.LINE_AA)

	if save_debug:
	global cnt
	cnt = (cnt + 1) % 5000
	if not os.path.exists(save_debug_path):
	os.mkdir(save_debug_path)
	save_name = '{}/{:05d}.jpg'.format(save_debug_path, cnt)
	if i < len(image_labels):
	image_label = image_labels[i]
	save_name = '{}/{:05d}'.format(save_debug_path, cnt)
	for x in image_label:
	class_name = cat2name[x]
	save_name = save_name + '\|{}'.format(class_name)
	save_name = save_name + '.jpg'
	cv2.imwrite(save_name, proposal_image)
	else:
	cv2.imshow('image', image)
	if proposals is not None:
	cv2.imshow('proposals', proposal_image)
	cv2.waitKey()