KyanChen's picture
Upload 303 files
4d0eb62
raw
history blame
33.5 kB
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence, Tuple, Union
import mmcv
import numpy as np
import torch
import torch.nn.functional as F
from mmengine.dataset import BaseDataset
from mmengine.dist import master_only
from mmengine.visualization import Visualizer
from mmengine.visualization.utils import img_from_canvas
from mmpretrain.registry import VISUALIZERS
from mmpretrain.structures import DataSample
from .utils import create_figure, get_adaptive_scale
@VISUALIZERS.register_module()
class UniversalVisualizer(Visualizer):
"""Universal Visualizer for multiple tasks.
Args:
name (str): Name of the instance. Defaults to 'visualizer'.
image (np.ndarray, optional): the origin image to draw. The format
should be RGB. Defaults to None.
vis_backends (list, optional): Visual backend config list.
Defaults to None.
save_dir (str, optional): Save file dir for all storage backends.
If it is None, the backend storage will not save any data.
fig_save_cfg (dict): Keyword parameters of figure for saving.
Defaults to empty dict.
fig_show_cfg (dict): Keyword parameters of figure for showing.
Defaults to empty dict.
"""
DEFAULT_TEXT_CFG = {
'family': 'monospace',
'color': 'white',
'bbox': dict(facecolor='black', alpha=0.5, boxstyle='Round'),
'verticalalignment': 'top',
'horizontalalignment': 'left',
}
@master_only
def visualize_cls(self,
image: np.ndarray,
data_sample: DataSample,
classes: Optional[Sequence[str]] = None,
draw_gt: bool = True,
draw_pred: bool = True,
draw_score: bool = True,
resize: Optional[int] = None,
rescale_factor: Optional[float] = None,
text_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: str = '',
step: int = 0) -> None:
"""Visualize image classification result.
This method will draw an text box on the input image to visualize the
information about image classification, like the ground-truth label and
prediction label.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
classes (Sequence[str], optional): The categories names.
Defaults to None.
draw_gt (bool): Whether to draw ground-truth labels.
Defaults to True.
draw_pred (bool): Whether to draw prediction labels.
Defaults to True.
draw_score (bool): Whether to draw the prediction scores
of prediction categories. Defaults to True.
resize (int, optional): Resize the short edge of the image to the
specified length before visualization. Defaults to None.
rescale_factor (float, optional): Rescale the image by the rescale
factor before visualization. Defaults to None.
text_cfg (dict): Extra text setting, which accepts
arguments of :meth:`mmengine.Visualizer.draw_texts`.
Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
if self.dataset_meta is not None:
classes = classes or self.dataset_meta.get('classes', None)
if resize is not None:
h, w = image.shape[:2]
if w < h:
image = mmcv.imresize(image, (resize, resize * h // w))
else:
image = mmcv.imresize(image, (resize * w // h, resize))
elif rescale_factor is not None:
image = mmcv.imrescale(image, rescale_factor)
texts = []
self.set_image(image)
if draw_gt and 'gt_label' in data_sample:
idx = data_sample.gt_label.tolist()
class_labels = [''] * len(idx)
if classes is not None:
class_labels = [f' ({classes[i]})' for i in idx]
labels = [str(idx[i]) + class_labels[i] for i in range(len(idx))]
prefix = 'Ground truth: '
texts.append(prefix + ('\n' + ' ' * len(prefix)).join(labels))
if draw_pred and 'pred_label' in data_sample:
idx = data_sample.pred_label.tolist()
score_labels = [''] * len(idx)
class_labels = [''] * len(idx)
if draw_score and 'pred_score' in data_sample:
score_labels = [
f', {data_sample.pred_score[i].item():.2f}' for i in idx
]
if classes is not None:
class_labels = [f' ({classes[i]})' for i in idx]
labels = [
str(idx[i]) + score_labels[i] + class_labels[i]
for i in range(len(idx))
]
prefix = 'Prediction: '
texts.append(prefix + ('\n' + ' ' * len(prefix)).join(labels))
img_scale = get_adaptive_scale(image.shape[:2])
text_cfg = {
'size': int(img_scale * 7),
**self.DEFAULT_TEXT_CFG,
**text_cfg,
}
self.ax_save.text(
img_scale * 5,
img_scale * 5,
'\n'.join(texts),
**text_cfg,
)
drawn_img = self.get_image()
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
@master_only
def visualize_image_retrieval(self,
image: np.ndarray,
data_sample: DataSample,
prototype_dataset: BaseDataset,
topk: int = 1,
draw_score: bool = True,
resize: Optional[int] = None,
text_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: Optional[str] = '',
step: int = 0) -> None:
"""Visualize image retrieval result.
This method will draw the input image and the images retrieved from the
prototype dataset.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
prototype_dataset (:obj:`BaseDataset`): The prototype dataset.
It should have `get_data_info` method and return a dict
includes `img_path`.
draw_score (bool): Whether to draw the match scores of the
retrieved images. Defaults to True.
resize (int, optional): Resize the long edge of the image to the
specified length before visualization. Defaults to None.
text_cfg (dict): Extra text setting, which accepts arguments of
:func:`plt.text`. Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg}
if resize is not None:
image = mmcv.imrescale(image, (resize, resize))
match_scores, indices = torch.topk(data_sample.pred_score, k=topk)
figure = create_figure(margin=True)
gs = figure.add_gridspec(2, topk)
query_plot = figure.add_subplot(gs[0, :])
query_plot.axis(False)
query_plot.imshow(image)
for k, (score, sample_idx) in enumerate(zip(match_scores, indices)):
sample = prototype_dataset.get_data_info(sample_idx.item())
value_image = mmcv.imread(sample['img_path'])[..., ::-1]
value_plot = figure.add_subplot(gs[1, k])
value_plot.axis(False)
value_plot.imshow(value_image)
if draw_score:
value_plot.text(
5,
5,
f'{score:.2f}',
**text_cfg,
)
drawn_img = img_from_canvas(figure.canvas)
self.set_image(drawn_img)
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
def add_mask_to_image(
self,
image: np.ndarray,
data_sample: DataSample,
resize: Union[int, Tuple[int]] = 224,
color: Union[str, Tuple[int]] = 'black',
alpha: Union[int, float] = 0.8,
) -> np.ndarray:
if isinstance(resize, int):
resize = (resize, resize)
image = mmcv.imresize(image, resize)
self.set_image(image)
if isinstance(data_sample.mask, np.ndarray):
data_sample.mask = torch.tensor(data_sample.mask)
mask = data_sample.mask.float()[None, None, ...]
mask_ = F.interpolate(mask, image.shape[:2], mode='nearest')[0, 0]
self.draw_binary_masks(mask_.bool(), colors=color, alphas=alpha)
drawn_img = self.get_image()
return drawn_img
@master_only
def visualize_masked_image(self,
image: np.ndarray,
data_sample: DataSample,
resize: Union[int, Tuple[int]] = 224,
color: Union[str, Tuple[int]] = 'black',
alpha: Union[int, float] = 0.8,
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: str = '',
step: int = 0) -> None:
"""Visualize masked image.
This method will draw an image with binary mask.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
resize (int | Tuple[int]): Resize the input image to the specified
shape. Defaults to 224.
color (str | Tuple[int]): The color of the binary mask.
Defaults to "black".
alpha (int | float): The transparency of the mask. Defaults to 0.8.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
drawn_img = self.add_mask_to_image(
image=image,
data_sample=data_sample,
resize=resize,
color=color,
alpha=alpha)
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
@master_only
def visualize_image_caption(self,
image: np.ndarray,
data_sample: DataSample,
resize: Optional[int] = None,
text_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: Optional[str] = '',
step: int = 0) -> None:
"""Visualize image caption result.
This method will draw the input image and the images caption.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
resize (int, optional): Resize the long edge of the image to the
specified length before visualization. Defaults to None.
text_cfg (dict): Extra text setting, which accepts arguments of
:func:`plt.text`. Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg}
if resize is not None:
h, w = image.shape[:2]
if w < h:
image = mmcv.imresize(image, (resize, resize * h // w))
else:
image = mmcv.imresize(image, (resize * w // h, resize))
self.set_image(image)
img_scale = get_adaptive_scale(image.shape[:2])
text_cfg = {
'size': int(img_scale * 7),
**self.DEFAULT_TEXT_CFG,
**text_cfg,
}
self.ax_save.text(
img_scale * 5,
img_scale * 5,
data_sample.get('pred_caption'),
wrap=True,
**text_cfg,
)
drawn_img = self.get_image()
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
@master_only
def visualize_vqa(self,
image: np.ndarray,
data_sample: DataSample,
resize: Optional[int] = None,
text_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: Optional[str] = '',
step: int = 0) -> None:
"""Visualize visual question answering result.
This method will draw the input image, question and answer.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
resize (int, optional): Resize the long edge of the image to the
specified length before visualization. Defaults to None.
text_cfg (dict): Extra text setting, which accepts arguments of
:func:`plt.text`. Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg}
if resize is not None:
h, w = image.shape[:2]
if w < h:
image = mmcv.imresize(image, (resize, resize * h // w))
else:
image = mmcv.imresize(image, (resize * w // h, resize))
self.set_image(image)
img_scale = get_adaptive_scale(image.shape[:2])
text_cfg = {
'size': int(img_scale * 7),
**self.DEFAULT_TEXT_CFG,
**text_cfg,
}
text = (f'Q: {data_sample.get("question")}\n'
f'A: {data_sample.get("pred_answer")}')
self.ax_save.text(
img_scale * 5,
img_scale * 5,
text,
wrap=True,
**text_cfg,
)
drawn_img = self.get_image()
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
@master_only
def visualize_visual_grounding(self,
image: np.ndarray,
data_sample: DataSample,
resize: Optional[int] = None,
text_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: Optional[str] = '',
line_width: Union[int, float] = 3,
bbox_color: Union[str, tuple] = 'green',
step: int = 0) -> None:
"""Visualize visual grounding result.
This method will draw the input image, bbox and the object.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
resize (int, optional): Resize the long edge of the image to the
specified length before visualization. Defaults to None.
text_cfg (dict): Extra text setting, which accepts arguments of
:func:`plt.text`. Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg}
gt_bboxes = data_sample.get('gt_bboxes')
pred_bboxes = data_sample.get('pred_bboxes')
if resize is not None:
h, w = image.shape[:2]
if w < h:
image, w_scale, h_scale = mmcv.imresize(
image, (resize, resize * h // w), return_scale=True)
else:
image, w_scale, h_scale = mmcv.imresize(
image, (resize * w // h, resize), return_scale=True)
pred_bboxes[:, ::2] *= w_scale
pred_bboxes[:, 1::2] *= h_scale
if gt_bboxes is not None:
gt_bboxes[:, ::2] *= w_scale
gt_bboxes[:, 1::2] *= h_scale
self.set_image(image)
# Avoid the line-width limit in the base classes.
self._default_font_size = 1e3
self.draw_bboxes(
pred_bboxes, line_widths=line_width, edge_colors=bbox_color)
if gt_bboxes is not None:
self.draw_bboxes(
gt_bboxes, line_widths=line_width, edge_colors='blue')
img_scale = get_adaptive_scale(image.shape[:2])
text_cfg = {
'size': int(img_scale * 7),
**self.DEFAULT_TEXT_CFG,
**text_cfg,
}
text_positions = pred_bboxes[:, :2] + line_width
for i in range(pred_bboxes.size(0)):
self.ax_save.text(
text_positions[i, 0] + line_width,
text_positions[i, 1] + line_width,
data_sample.get('text'),
**text_cfg,
)
drawn_img = self.get_image()
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
@master_only
def visualize_t2i_retrieval(self,
text: str,
data_sample: DataSample,
prototype_dataset: BaseDataset,
topk: int = 1,
draw_score: bool = True,
text_cfg: dict = dict(),
fig_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: Optional[str] = '',
step: int = 0) -> None:
"""Visualize Text-To-Image retrieval result.
This method will draw the input text and the images retrieved from the
prototype dataset.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
prototype_dataset (:obj:`BaseDataset`): The prototype dataset.
It should have `get_data_info` method and return a dict
includes `img_path`.
topk (int): To visualize the topk matching items. Defaults to 1.
draw_score (bool): Whether to draw the match scores of the
retrieved images. Defaults to True.
text_cfg (dict): Extra text setting, which accepts arguments of
:func:`plt.text`. Defaults to an empty dict.
fig_cfg (dict): Extra figure setting, which accepts arguments of
:func:`plt.Figure`. Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg}
match_scores, indices = torch.topk(data_sample.pred_score, k=topk)
figure = create_figure(margin=True, **fig_cfg)
figure.suptitle(text)
gs = figure.add_gridspec(1, topk)
for k, (score, sample_idx) in enumerate(zip(match_scores, indices)):
sample = prototype_dataset.get_data_info(sample_idx.item())
value_image = mmcv.imread(sample['img_path'])[..., ::-1]
value_plot = figure.add_subplot(gs[0, k])
value_plot.axis(False)
value_plot.imshow(value_image)
if draw_score:
value_plot.text(
5,
5,
f'{score:.2f}',
**text_cfg,
)
drawn_img = img_from_canvas(figure.canvas)
self.set_image(drawn_img)
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img
@master_only
def visualize_i2t_retrieval(self,
image: np.ndarray,
data_sample: DataSample,
prototype_dataset: Sequence[str],
topk: int = 1,
draw_score: bool = True,
resize: Optional[int] = None,
text_cfg: dict = dict(),
show: bool = False,
wait_time: float = 0,
out_file: Optional[str] = None,
name: str = '',
step: int = 0) -> None:
"""Visualize Image-To-Text retrieval result.
This method will draw the input image and the texts retrieved from the
prototype dataset.
Args:
image (np.ndarray): The image to draw. The format should be RGB.
data_sample (:obj:`DataSample`): The annotation of the image.
prototype_dataset (Sequence[str]): The prototype dataset.
It should be a list of texts.
topk (int): To visualize the topk matching items. Defaults to 1.
draw_score (bool): Whether to draw the prediction scores
of prediction categories. Defaults to True.
resize (int, optional): Resize the short edge of the image to the
specified length before visualization. Defaults to None.
text_cfg (dict): Extra text setting, which accepts
arguments of :meth:`mmengine.Visualizer.draw_texts`.
Defaults to an empty dict.
show (bool): Whether to display the drawn image in a window, please
confirm your are able to access the graphical interface.
Defaults to False.
wait_time (float): The display time (s). Defaults to 0, which means
"forever".
out_file (str, optional): Extra path to save the visualization
result. If specified, the visualizer will only save the result
image to the out_file and ignore its storage backends.
Defaults to None.
name (str): The image identifier. It's useful when using the
storage backends of the visualizer to save or display the
image. Defaults to an empty string.
step (int): The global step value. It's useful to record a
series of visualization results for the same image with the
storage backends. Defaults to 0.
Returns:
np.ndarray: The visualization image.
"""
if resize is not None:
h, w = image.shape[:2]
if w < h:
image = mmcv.imresize(image, (resize, resize * h // w))
else:
image = mmcv.imresize(image, (resize * w // h, resize))
self.set_image(image)
match_scores, indices = torch.topk(data_sample.pred_score, k=topk)
texts = []
for score, sample_idx in zip(match_scores, indices):
text = prototype_dataset[sample_idx.item()]
if draw_score:
text = f'{score:.2f} ' + text
texts.append(text)
img_scale = get_adaptive_scale(image.shape[:2])
text_cfg = {
'size': int(img_scale * 7),
**self.DEFAULT_TEXT_CFG,
**text_cfg,
}
self.ax_save.text(
img_scale * 5,
img_scale * 5,
'\n'.join(texts),
**text_cfg,
)
drawn_img = self.get_image()
if show:
self.show(drawn_img, win_name=name, wait_time=wait_time)
if out_file is not None:
# save the image to the target file instead of vis_backends
mmcv.imwrite(drawn_img[..., ::-1], out_file)
else:
self.add_image(name, drawn_img, step=step)
return drawn_img