|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Visualizes and stores results of a panoptic-deeplab model.""" |
|
import os.path |
|
from typing import Any, Dict, List, Text |
|
|
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
from deeplab2 import common |
|
from deeplab2.data import coco_constants |
|
from deeplab2.data import dataset |
|
from deeplab2.trainer import vis_utils |
|
|
|
|
|
_IMAGE_FORMAT = '%06d_image' |
|
_CENTER_LABEL_FORMAT = '%06d_center_label' |
|
_OFFSET_LABEL_FORMAT = '%06d_offset_label' |
|
_PANOPTIC_LABEL_FORMAT = '%06d_panoptic_label' |
|
_SEMANTIC_LABEL_FORMAT = '%06d_semantic_label' |
|
|
|
|
|
_INSTANCE_PREDICTION_FORMAT = '%06d_instance_prediction' |
|
_CENTER_HEATMAP_PREDICTION_FORMAT = '%06d_center_prediction' |
|
_OFFSET_PREDICTION_RGB_FORMAT = '%06d_offset_prediction_rgb' |
|
_PANOPTIC_PREDICTION_FORMAT = '%06d_panoptic_prediction' |
|
_SEMANTIC_PREDICTION_FORMAT = '%06d_semantic_prediction' |
|
|
|
|
|
_ANALYSIS_FORMAT = '%06d_semantic_error' |
|
|
|
|
|
_CITYSCAPES_TRAIN_ID_TO_EVAL_ID = ( |
|
7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 0 |
|
) |
|
_COCO_TRAIN_ID_TO_EVAL_ID = coco_constants.get_id_mapping_inverse() |
|
|
|
|
|
def _convert_train_id_to_eval_id( |
|
prediction: np.ndarray, dataset_name: str) -> np.ndarray: |
|
"""Converts the predicted label for evaluation. |
|
|
|
There are cases where the training labels are not equal to the evaluation |
|
labels. This function is used to perform the conversion so that we could |
|
evaluate the results on the evaluation server. |
|
|
|
Args: |
|
prediction: Semantic segmentation prediction. |
|
dataset_name: Dataset name. |
|
|
|
Returns: |
|
Semantic segmentation prediction whose labels have been changed. |
|
|
|
Raises: |
|
ValueError: If the dataset is not supported. |
|
""" |
|
if 'cityscapes' in dataset_name: |
|
train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID |
|
elif 'coco' in dataset_name: |
|
train_id_to_eval_id = _COCO_TRAIN_ID_TO_EVAL_ID |
|
else: |
|
raise ValueError( |
|
'Unsupported dataset %s for converting semantic class IDs.' % |
|
dataset_name) |
|
length = np.maximum(256, len(train_id_to_eval_id)) |
|
to_eval_id_map = np.zeros((length), dtype=prediction.dtype) |
|
dataset_ids = np.asarray( |
|
train_id_to_eval_id, dtype=prediction.dtype) |
|
to_eval_id_map[:len(train_id_to_eval_id)] = dataset_ids |
|
return to_eval_id_map[prediction] |
|
|
|
|
|
def _get_fg_mask(label_map: np.ndarray, thing_list: List[int]) -> np.ndarray: |
|
fg_mask = np.zeros_like(label_map, np.bool) |
|
for class_id in np.unique(label_map): |
|
if class_id in thing_list: |
|
fg_mask = np.logical_or(fg_mask, np.equal(label_map, class_id)) |
|
fg_mask = np.expand_dims(fg_mask, axis=2) |
|
return fg_mask.astype(np.int) |
|
|
|
|
|
def store_raw_predictions(predictions: Dict[str, Any], |
|
image_filename: tf.Tensor, |
|
dataset_info: dataset.DatasetDescriptor, |
|
save_dir: Text, |
|
sequence: tf.Tensor, |
|
raw_panoptic_format='two_channel_png', |
|
convert_to_eval=True): |
|
"""Stores raw predictions to the specified path. |
|
|
|
Raw predictions are saved in the specified path with the specified |
|
`raw_panoptic_format`. For the `raw_panoptic_format`, we currently |
|
support `two_channel_png`, `three_channel_png` and `two_channel_numpy_array`. |
|
Note that `two_channel_png` and `three_channel_png` could not encode large |
|
values of semantic label and instance ID due to limited PNG channel size. In |
|
such a case, use `raw_panoptic_format` = `two_channel_numpy_array` to save |
|
the raw predictions as two channel numpy array (i.e., first channel encodes |
|
the semantic class and the second channel the instance ID). |
|
|
|
Args: |
|
predictions: A dctionary with string keys and any content. Tensors under |
|
common.PRED_SEMANTIC_KEY and common.PRED_PANOPTIC_KEY will be stored. |
|
image_filename: A tf.Tensor containing the image filename. |
|
dataset_info: A dataset.DatasetDescriptor specifying the dataset. |
|
save_dir: A path to the folder to write the output to. |
|
sequence: A tf.Tensor describing the sequence that the image belongs to. |
|
raw_panoptic_format: A string specifying what format the panoptic output |
|
should be stored. Supports: |
|
- 'two_channel_png': The popular format, also supported by the official |
|
COCO panoptic API (https://github.com/cocodataset/panopticapi), where |
|
the saved PNG image contains R-channel for semantic labels and |
|
G-channel for instance IDs. |
|
- 'three_channel_png': A simple extension of the 'two_channel_png' format, |
|
and is adopted in some video panoptic segmentation datasets (for |
|
example, KITTI-STEP and MOTChallenge-STEP), where the saved PNG image |
|
contains R-channel for semantic labels, G-channel for the values of |
|
(instance ID // 256), and B-channel for (instance ID % 256). |
|
- 'two_channel_numpy_array': A more flexible format (unconstrained by the |
|
PNG channel size), where the panoptic predictions are saved as a numpy |
|
array in the two channel format (i.e., first channel encodes the |
|
semantic class and the second channel the instance ID). |
|
convert_to_eval: A flag specyfing whether semantic class IDs should be |
|
converted to cityscapes eval IDs. This is usefulfor the official test |
|
sever evaluation. |
|
|
|
Raises: |
|
ValueError: An error occurs when semantic label or instance ID is larger |
|
than the values supported by the 'two_channel_png' or 'three_channel_png' |
|
format. Or, if the raw_panoptic_format is not supported. |
|
""" |
|
|
|
predictions = {key: predictions[key][0] for key in predictions} |
|
predictions = vis_utils.squeeze_batch_dim_and_convert_to_numpy(predictions) |
|
image_filename = image_filename.numpy().decode('utf-8') |
|
image_filename = os.path.splitext(image_filename)[0] |
|
|
|
|
|
semantic_prediction = predictions[common.PRED_SEMANTIC_KEY] |
|
if convert_to_eval: |
|
semantic_prediction = _convert_train_id_to_eval_id( |
|
semantic_prediction, dataset_info.dataset_name) |
|
output_folder = os.path.join(save_dir, 'raw_semantic') |
|
if dataset_info.is_video_dataset: |
|
sequence = sequence.numpy().decode('utf-8') |
|
output_folder = os.path.join(output_folder, sequence) |
|
tf.io.gfile.makedirs(output_folder) |
|
vis_utils.save_annotation( |
|
semantic_prediction, |
|
output_folder, |
|
image_filename, |
|
add_colormap=False) |
|
|
|
if common.PRED_PANOPTIC_KEY in predictions: |
|
|
|
|
|
|
|
panoptic_prediction = predictions[common.PRED_PANOPTIC_KEY] |
|
panoptic_outputs = np.zeros( |
|
(panoptic_prediction.shape[0], panoptic_prediction.shape[1], 3), |
|
dtype=panoptic_prediction.dtype) |
|
predicted_semantic_labels = ( |
|
panoptic_prediction // dataset_info.panoptic_label_divisor) |
|
if convert_to_eval: |
|
predicted_semantic_labels = _convert_train_id_to_eval_id( |
|
predicted_semantic_labels, dataset_info.dataset_name) |
|
predicted_instance_labels = predictions[ |
|
common.PRED_PANOPTIC_KEY] % dataset_info.panoptic_label_divisor |
|
|
|
output_folder = os.path.join(save_dir, 'raw_panoptic') |
|
if dataset_info.is_video_dataset: |
|
output_folder = os.path.join(output_folder, sequence) |
|
tf.io.gfile.makedirs(output_folder) |
|
if raw_panoptic_format == 'two_channel_png': |
|
if np.max(predicted_semantic_labels) > 255: |
|
raise ValueError('Overflow: Semantic IDs greater 255 are not supported ' |
|
'for images of 8-bit. Please save output as numpy ' |
|
'arrays instead.') |
|
if np.max(predicted_instance_labels) > 255: |
|
raise ValueError( |
|
'Overflow: Instance IDs greater 255 could not be encoded by ' |
|
'G channel. Please save output as numpy arrays instead.') |
|
panoptic_outputs[:, :, 0] = predicted_semantic_labels |
|
panoptic_outputs[:, :, 1] = predicted_instance_labels |
|
vis_utils.save_annotation(panoptic_outputs, |
|
output_folder, |
|
image_filename, |
|
add_colormap=False) |
|
elif raw_panoptic_format == 'three_channel_png': |
|
if np.max(predicted_semantic_labels) > 255: |
|
raise ValueError('Overflow: Semantic IDs greater 255 are not supported ' |
|
'for images of 8-bit. Please save output as numpy ' |
|
'arrays instead.') |
|
if np.max(predicted_instance_labels) > 65535: |
|
raise ValueError( |
|
'Overflow: Instance IDs greater 65535 could not be encoded by ' |
|
'G and B channels. Please save output as numpy arrays instead.') |
|
panoptic_outputs[:, :, 0] = predicted_semantic_labels |
|
panoptic_outputs[:, :, 1] = predicted_instance_labels // 256 |
|
panoptic_outputs[:, :, 2] = predicted_instance_labels % 256 |
|
vis_utils.save_annotation(panoptic_outputs, |
|
output_folder, |
|
image_filename, |
|
add_colormap=False) |
|
elif raw_panoptic_format == 'two_channel_numpy_array': |
|
panoptic_outputs[:, :, 0] = predicted_semantic_labels |
|
panoptic_outputs[:, :, 1] = predicted_instance_labels |
|
with tf.io.gfile.GFile( |
|
os.path.join(output_folder, image_filename + '.npy'), 'w') as f: |
|
np.save(f, panoptic_outputs) |
|
else: |
|
raise ValueError( |
|
'Unknown raw_panoptic_format %s.' % raw_panoptic_format) |
|
|
|
|
|
def store_predictions(predictions: Dict[str, Any], inputs: Dict[str, Any], |
|
image_id: int, dataset_info: dataset.DatasetDescriptor, |
|
save_dir: Text): |
|
"""Saves predictions and labels to the specified path.""" |
|
predictions = {key: predictions[key][0] for key in predictions} |
|
predictions = vis_utils.squeeze_batch_dim_and_convert_to_numpy(predictions) |
|
inputs = {key: inputs[key][0] for key in inputs} |
|
del inputs[common.IMAGE_NAME] |
|
inputs = vis_utils.squeeze_batch_dim_and_convert_to_numpy(inputs) |
|
|
|
thing_list = dataset_info.class_has_instances_list |
|
label_divisor = dataset_info.panoptic_label_divisor |
|
colormap_name = dataset_info.colormap |
|
|
|
|
|
image = inputs[common.IMAGE] |
|
vis_utils.save_annotation( |
|
image, |
|
save_dir, |
|
_IMAGE_FORMAT % image_id, |
|
add_colormap=False) |
|
|
|
|
|
vis_utils.save_annotation( |
|
predictions[common.PRED_SEMANTIC_KEY], |
|
save_dir, |
|
_SEMANTIC_PREDICTION_FORMAT % image_id, |
|
add_colormap=True, |
|
colormap_name=colormap_name) |
|
vis_utils.save_annotation( |
|
inputs[common.GT_SEMANTIC_RAW], |
|
save_dir, |
|
_SEMANTIC_LABEL_FORMAT % image_id, |
|
add_colormap=True, |
|
colormap_name=colormap_name) |
|
|
|
if common.PRED_CENTER_HEATMAP_KEY in predictions: |
|
|
|
heatmap_pred = predictions[common.PRED_CENTER_HEATMAP_KEY] |
|
heat_map_gt = inputs[common.GT_INSTANCE_CENTER_KEY] |
|
vis_utils.save_annotation( |
|
vis_utils.overlay_heatmap_on_image( |
|
heatmap_pred, |
|
image), |
|
save_dir, |
|
_CENTER_HEATMAP_PREDICTION_FORMAT % image_id, |
|
add_colormap=False) |
|
vis_utils.save_annotation( |
|
vis_utils.overlay_heatmap_on_image( |
|
heat_map_gt, |
|
image), |
|
save_dir, |
|
_CENTER_LABEL_FORMAT % image_id, |
|
add_colormap=False) |
|
|
|
if common.PRED_OFFSET_MAP_KEY in predictions: |
|
|
|
center_offset_prediction = predictions[common.PRED_OFFSET_MAP_KEY] |
|
center_offset_prediction_rgb = vis_utils.flow_to_color( |
|
center_offset_prediction) |
|
semantic_prediction = predictions[common.PRED_SEMANTIC_KEY] |
|
pred_fg_mask = _get_fg_mask(semantic_prediction, thing_list) |
|
center_offset_prediction_rgb = ( |
|
center_offset_prediction_rgb * pred_fg_mask) |
|
vis_utils.save_annotation( |
|
center_offset_prediction_rgb, |
|
save_dir, |
|
_OFFSET_PREDICTION_RGB_FORMAT % image_id, |
|
add_colormap=False) |
|
|
|
center_offset_label = inputs[common.GT_INSTANCE_REGRESSION_KEY] |
|
center_offset_label_rgb = vis_utils.flow_to_color(center_offset_label) |
|
gt_fg_mask = _get_fg_mask(inputs[common.GT_SEMANTIC_RAW], thing_list) |
|
center_offset_label_rgb = center_offset_label_rgb * gt_fg_mask |
|
|
|
vis_utils.save_annotation( |
|
center_offset_label_rgb, |
|
save_dir, |
|
_OFFSET_LABEL_FORMAT % image_id, |
|
add_colormap=False) |
|
|
|
if common.PRED_INSTANCE_KEY in predictions: |
|
|
|
vis_utils.save_annotation( |
|
vis_utils.create_rgb_from_instance_map( |
|
predictions[common.PRED_INSTANCE_KEY]), |
|
save_dir, |
|
_INSTANCE_PREDICTION_FORMAT % image_id, |
|
add_colormap=False) |
|
|
|
if common.PRED_PANOPTIC_KEY in predictions: |
|
|
|
vis_utils.save_parsing_result( |
|
predictions[common.PRED_PANOPTIC_KEY], |
|
label_divisor=label_divisor, |
|
thing_list=thing_list, |
|
save_dir=save_dir, |
|
filename=_PANOPTIC_PREDICTION_FORMAT % image_id, |
|
colormap_name=colormap_name) |
|
vis_utils.save_parsing_result( |
|
parsing_result=inputs[common.GT_PANOPTIC_RAW], |
|
label_divisor=label_divisor, |
|
thing_list=thing_list, |
|
save_dir=save_dir, |
|
filename=_PANOPTIC_LABEL_FORMAT % image_id, |
|
colormap_name=colormap_name) |
|
|
|
|
|
label = inputs[common.GT_SEMANTIC_RAW].astype(np.uint8) |
|
error_prediction = ( |
|
(predictions[common.PRED_SEMANTIC_KEY] != label) & |
|
(label != dataset_info.ignore_label)).astype(np.uint8) * 255 |
|
vis_utils.save_annotation( |
|
error_prediction, |
|
save_dir, |
|
_ANALYSIS_FORMAT % (image_id), |
|
add_colormap=False) |
|
|