Spaces:

CVPR
/

regionclip-demo

Runtime error

regionclip-demo / detectron2 /utils /video_visualizer.py

jwyang

first commit

4121bec almost 2 years ago

No virus

8.89 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	import numpy as np
	import pycocotools.mask as mask_util

	from detectron2.utils.visualizer import (
	ColorMode,
	Visualizer,
	_create_text_labels,
	_PanopticPrediction,
	)

	from .colormap import random_color


	class _DetectedInstance:
	"""
	Used to store data about detected objects in video frame,
	in order to transfer color to objects in the future frames.

	Attributes:
	label (int):
	bbox (tuple[float]):
	mask_rle (dict):
	color (tuple[float]): RGB colors in range (0, 1)
	ttl (int): time-to-live for the instance. For example, if ttl=2,
	the instance color can be transferred to objects in the next two frames.
	"""

	__slots__ = ["label", "bbox", "mask_rle", "color", "ttl"]

	def __init__(self, label, bbox, mask_rle, color, ttl):
	self.label = label
	self.bbox = bbox
	self.mask_rle = mask_rle
	self.color = color
	self.ttl = ttl


	class VideoVisualizer:
	def __init__(self, metadata, instance_mode=ColorMode.IMAGE):
	"""
	Args:
	metadata (MetadataCatalog): image metadata.
	"""
	self.metadata = metadata
	self._old_instances = []
	assert instance_mode in [
	ColorMode.IMAGE,
	ColorMode.IMAGE_BW,
	], "Other mode not supported yet."
	self._instance_mode = instance_mode

	def draw_instance_predictions(self, frame, predictions):
	"""
	Draw instance-level prediction results on an image.

	Args:
	frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
	predictions (Instances): the output of an instance detection/segmentation
	model. Following fields will be used to draw:
	"pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").

	Returns:
	output (VisImage): image object with visualizations.
	"""
	frame_visualizer = Visualizer(frame, self.metadata)
	num_instances = len(predictions)
	if num_instances == 0:
	return frame_visualizer.output

	boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
	scores = predictions.scores if predictions.has("scores") else None
	classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
	keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None

	if predictions.has("pred_masks"):
	masks = predictions.pred_masks
	# mask IOU is not yet enabled
	# masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
	# assert len(masks_rles) == num_instances
	else:
	masks = None

	detected = [
	_DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
	for i in range(num_instances)
	]
	colors = self._assign_colors(detected)

	labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))

	if self._instance_mode == ColorMode.IMAGE_BW:
	# any() returns uint8 tensor
	frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
	(masks.any(dim=0) > 0).numpy() if masks is not None else None
	)
	alpha = 0.3
	else:
	alpha = 0.5

	frame_visualizer.overlay_instances(
	boxes=None if masks is not None else boxes, # boxes are a bit distracting
	masks=masks,
	labels=labels,
	keypoints=keypoints,
	assigned_colors=colors,
	alpha=alpha,
	)

	return frame_visualizer.output

	def draw_sem_seg(self, frame, sem_seg, area_threshold=None):
	"""
	Args:
	sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W),
	each value is the integer label.
	area_threshold (Optional[int]): only draw segmentations larger than the threshold
	"""
	# don't need to do anything special
	frame_visualizer = Visualizer(frame, self.metadata)
	frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None)
	return frame_visualizer.output

	def draw_panoptic_seg_predictions(
	self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5
	):
	frame_visualizer = Visualizer(frame, self.metadata)
	pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)

	if self._instance_mode == ColorMode.IMAGE_BW:
	frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
	pred.non_empty_mask()
	)

	# draw mask for all semantic segments first i.e. "stuff"
	for mask, sinfo in pred.semantic_masks():
	category_idx = sinfo["category_id"]
	try:
	mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
	except AttributeError:
	mask_color = None

	frame_visualizer.draw_binary_mask(
	mask,
	color=mask_color,
	text=self.metadata.stuff_classes[category_idx],
	alpha=alpha,
	area_threshold=area_threshold,
	)

	all_instances = list(pred.instance_masks())
	if len(all_instances) == 0:
	return frame_visualizer.output
	# draw mask for all instances second
	masks, sinfo = list(zip(*all_instances))
	num_instances = len(masks)
	masks_rles = mask_util.encode(
	np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")
	)
	assert len(masks_rles) == num_instances

	category_ids = [x["category_id"] for x in sinfo]
	detected = [
	_DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8)
	for i in range(num_instances)
	]
	colors = self._assign_colors(detected)
	labels = [self.metadata.thing_classes[k] for k in category_ids]

	frame_visualizer.overlay_instances(
	boxes=None,
	masks=masks,
	labels=labels,
	keypoints=None,
	assigned_colors=colors,
	alpha=alpha,
	)
	return frame_visualizer.output

	def _assign_colors(self, instances):
	"""
	Naive tracking heuristics to assign same color to the same instance,
	will update the internal state of tracked instances.

	Returns:
	list[tuple[float]]: list of colors.
	"""

	# Compute iou with either boxes or masks:
	is_crowd = np.zeros((len(instances),), dtype=np.bool)
	if instances[0].bbox is None:
	assert instances[0].mask_rle is not None
	# use mask iou only when box iou is None
	# because box seems good enough
	rles_old = [x.mask_rle for x in self._old_instances]
	rles_new = [x.mask_rle for x in instances]
	ious = mask_util.iou(rles_old, rles_new, is_crowd)
	threshold = 0.5
	else:
	boxes_old = [x.bbox for x in self._old_instances]
	boxes_new = [x.bbox for x in instances]
	ious = mask_util.iou(boxes_old, boxes_new, is_crowd)
	threshold = 0.6
	if len(ious) == 0:
	ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32")

	# Only allow matching instances of the same label:
	for old_idx, old in enumerate(self._old_instances):
	for new_idx, new in enumerate(instances):
	if old.label != new.label:
	ious[old_idx, new_idx] = 0

	matched_new_per_old = np.asarray(ious).argmax(axis=1)
	max_iou_per_old = np.asarray(ious).max(axis=1)

	# Try to find match for each old instance:
	extra_instances = []
	for idx, inst in enumerate(self._old_instances):
	if max_iou_per_old[idx] > threshold:
	newidx = matched_new_per_old[idx]
	if instances[newidx].color is None:
	instances[newidx].color = inst.color
	continue
	# If an old instance does not match any new instances,
	# keep it for the next frame in case it is just missed by the detector
	inst.ttl -= 1
	if inst.ttl > 0:
	extra_instances.append(inst)

	# Assign random color to newly-detected instances:
	for inst in instances:
	if inst.color is None:
	inst.color = random_color(rgb=True, maximum=1)
	self._old_instances = instances[:] + extra_instances
	return [d.color for d in instances]