Spaces:

karolmajek
/

maxdeeplab

Runtime error

App Files Files Community

maxdeeplab / utils /coco_tools.py

karolmajek

from https://huggingface.co/spaces/akhaliq/deeplab2

0924f30 over 3 years ago

raw

history blame contribute delete

8.43 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Wrappers and conversions for third party pycocotools.

	This is derived from code in the Tensorflow Object Detection API:
	https://github.com/tensorflow/models/tree/master/research/object_detection

	Huang et. al. "Speed/accuracy trade-offs for modern convolutional object
	detectors" CVPR 2017.
	"""

	from typing import Any, Collection, Dict, List, Optional, Union

	import numpy as np
	from pycocotools import mask


	COCO_METRIC_NAMES_AND_INDEX = (
	('Precision/mAP', 0),
	('Precision/mAP@.50IOU', 1),
	('Precision/mAP@.75IOU', 2),
	('Precision/mAP (small)', 3),
	('Precision/mAP (medium)', 4),
	('Precision/mAP (large)', 5),
	('Recall/AR@1', 6),
	('Recall/AR@10', 7),
	('Recall/AR@100', 8),
	('Recall/AR@100 (small)', 9),
	('Recall/AR@100 (medium)', 10),
	('Recall/AR@100 (large)', 11)
	)


	def _ConvertBoxToCOCOFormat(box: np.ndarray) -> List[float]:
	"""Converts a box in [ymin, xmin, ymax, xmax] format to COCO format.

	This is a utility function for converting from our internal
	[ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
	i.e., [xmin, ymin, width, height].

	Args:
	box: a [ymin, xmin, ymax, xmax] numpy array

	Returns:
	a list of floats representing [xmin, ymin, width, height]
	"""
	return [float(box[1]), float(box[0]), float(box[3] - box[1]),
	float(box[2] - box[0])]


	def ExportSingleImageGroundtruthToCoco(
	image_id: Union[int, str],
	next_annotation_id: int,
	category_id_set: Collection[int],
	groundtruth_boxes: np.ndarray,
	groundtruth_classes: np.ndarray,
	groundtruth_masks: np.ndarray,
	groundtruth_is_crowd: Optional[np.ndarray] = None) -> List[Dict[str, Any]]:
	"""Exports groundtruth of a single image to COCO format.

	This function converts groundtruth detection annotations represented as numpy
	arrays to dictionaries that can be ingested by the COCO evaluation API. Note
	that the image_ids provided here must match the ones given to
	ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
	correspondence - that is: groundtruth_boxes[i, :], and
	groundtruth_classes[i] are associated with the same groundtruth annotation.

	In the exported result, "area" fields are always set to the foregorund area of
	the mask.

	Args:
	image_id: a unique image identifier either of type integer or string.
	next_annotation_id: integer specifying the first id to use for the
	groundtruth annotations. All annotations are assigned a continuous integer
	id starting from this value.
	category_id_set: A set of valid class ids. Groundtruth with classes not in
	category_id_set are dropped.
	groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
	groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
	groundtruth_masks: uint8 numpy array of shape [num_detections, image_height,
	image_width] containing detection_masks.
	groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
	indicating whether groundtruth boxes are crowd.

	Returns:
	a list of groundtruth annotations for a single image in the COCO format.

	Raises:
	ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
	right lengths or (2) if each of the elements inside these lists do not
	have the correct shapes or (3) if image_ids are not integers
	"""

	if len(groundtruth_classes.shape) != 1:
	raise ValueError('groundtruth_classes is '
	'expected to be of rank 1.')
	if len(groundtruth_boxes.shape) != 2:
	raise ValueError('groundtruth_boxes is expected to be of '
	'rank 2.')
	if groundtruth_boxes.shape[1] != 4:
	raise ValueError('groundtruth_boxes should have '
	'shape[1] == 4.')
	num_boxes = groundtruth_classes.shape[0]
	if num_boxes != groundtruth_boxes.shape[0]:
	raise ValueError('Corresponding entries in groundtruth_classes, '
	'and groundtruth_boxes should have '
	'compatible shapes (i.e., agree on the 0th dimension).'
	'Classes shape: %d. Boxes shape: %d. Image ID: %s' % (
	groundtruth_classes.shape[0],
	groundtruth_boxes.shape[0], image_id))
	has_is_crowd = groundtruth_is_crowd is not None
	if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
	raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
	groundtruth_list = []
	for i in range(num_boxes):
	if groundtruth_classes[i] in category_id_set:
	iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
	segment = mask.encode(np.asfortranarray(groundtruth_masks[i]))
	area = mask.area(segment)
	export_dict = {
	'id': next_annotation_id + i,
	'image_id': image_id,
	'category_id': int(groundtruth_classes[i]),
	'bbox': list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
	'segmentation': segment,
	'area': area,
	'iscrowd': iscrowd
	}

	groundtruth_list.append(export_dict)
	return groundtruth_list


	def ExportSingleImageDetectionMasksToCoco(
	image_id: Union[int, str], category_id_set: Collection[int],
	detection_masks: np.ndarray, detection_scores: np.ndarray,
	detection_classes: np.ndarray) -> List[Dict[str, Any]]:
	"""Exports detection masks of a single image to COCO format.

	This function converts detections represented as numpy arrays to dictionaries
	that can be ingested by the COCO evaluation API. We assume that
	detection_masks, detection_scores, and detection_classes are in correspondence
	- that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
	are associated with the same annotation.

	Args:
	image_id: unique image identifier either of type integer or string.
	category_id_set: A set of valid class ids. Detections with classes not in
	category_id_set are dropped.
	detection_masks: uint8 numpy array of shape [num_detections, image_height,
	image_width] containing detection_masks.
	detection_scores: float numpy array of shape [num_detections] containing
	scores for detection masks.
	detection_classes: integer numpy array of shape [num_detections] containing
	the classes for detection masks.

	Returns:
	a list of detection mask annotations for a single image in the COCO format.

	Raises:
	ValueError: if (1) detection_masks, detection_scores and detection_classes
	do not have the right lengths or (2) if each of the elements inside these
	lists do not have the correct shapes or (3) if image_ids are not integers.
	"""

	if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
	raise ValueError('All entries in detection_classes and detection_scores'
	'expected to be of rank 1.')
	num_boxes = detection_classes.shape[0]
	if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
	raise ValueError('Corresponding entries in detection_classes, '
	'detection_scores and detection_masks should have '
	'compatible lengths and shapes '
	'Classes length: %d. Masks length: %d. '
	'Scores length: %d' % (
	detection_classes.shape[0], len(detection_masks),
	detection_scores.shape[0]
	))
	detections_list = []
	for i in range(num_boxes):
	if detection_classes[i] in category_id_set:
	detections_list.append({
	'image_id': image_id,
	'category_id': int(detection_classes[i]),
	'segmentation': mask.encode(np.asfortranarray(detection_masks[i])),
	'score': float(detection_scores[i])
	})
	return detections_list