Spaces:
No application file
No application file
| import bisect | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import matplotlib | |
| from copy import deepcopy | |
| def _compute_conf_thresh(data): | |
| dataset_name = data["dataset_name"][0].lower() | |
| if dataset_name == "scannet": | |
| thr = 5e-4 | |
| elif dataset_name == "megadepth" or dataset_name == "gl3d": | |
| thr = 1e-4 | |
| else: | |
| raise ValueError(f"Unknown dataset: {dataset_name}") | |
| return thr | |
| # --- VISUALIZATION --- # | |
| def make_matching_figure( | |
| img0, | |
| img1, | |
| mkpts0, | |
| mkpts1, | |
| color, | |
| kpts0=None, | |
| kpts1=None, | |
| text=[], | |
| dpi=75, | |
| path=None, | |
| ): | |
| # draw image pair | |
| assert ( | |
| mkpts0.shape[0] == mkpts1.shape[0] | |
| ), f"mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}" | |
| fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi) | |
| axes[0].imshow(img0, cmap="gray") | |
| axes[1].imshow(img1, cmap="gray") | |
| for i in range(2): # clear all frames | |
| axes[i].get_yaxis().set_ticks([]) | |
| axes[i].get_xaxis().set_ticks([]) | |
| for spine in axes[i].spines.values(): | |
| spine.set_visible(False) | |
| plt.tight_layout(pad=1) | |
| if kpts0 is not None: | |
| assert kpts1 is not None | |
| axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c="w", s=2) | |
| axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c="w", s=2) | |
| # draw matches | |
| if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0: | |
| fig.canvas.draw() | |
| transFigure = fig.transFigure.inverted() | |
| fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0)) | |
| fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1)) | |
| fig.lines = [ | |
| matplotlib.lines.Line2D( | |
| (fkpts0[i, 0], fkpts1[i, 0]), | |
| (fkpts0[i, 1], fkpts1[i, 1]), | |
| transform=fig.transFigure, | |
| c=color[i], | |
| linewidth=1, | |
| ) | |
| for i in range(len(mkpts0)) | |
| ] | |
| axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color, s=4) | |
| axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color, s=4) | |
| # put txts | |
| txt_color = "k" if img0[:100, :200].mean() > 200 else "w" | |
| fig.text( | |
| 0.01, | |
| 0.99, | |
| "\n".join(text), | |
| transform=fig.axes[0].transAxes, | |
| fontsize=15, | |
| va="top", | |
| ha="left", | |
| color=txt_color, | |
| ) | |
| # save or return figure | |
| if path: | |
| plt.savefig(str(path), bbox_inches="tight", pad_inches=0) | |
| plt.close() | |
| else: | |
| return fig | |
| def _make_evaluation_figure(data, b_id, alpha="dynamic"): | |
| b_mask = data["m_bids"] == b_id | |
| conf_thr = _compute_conf_thresh(data) | |
| img0 = (data["image0"][b_id][0].cpu().numpy() * 255).round().astype(np.int32) | |
| img1 = (data["image1"][b_id][0].cpu().numpy() * 255).round().astype(np.int32) | |
| kpts0 = data["mkpts0_f"][b_mask].cpu().numpy() | |
| kpts1 = data["mkpts1_f"][b_mask].cpu().numpy() | |
| # for megadepth, we visualize matches on the resized image | |
| if "scale0" in data: | |
| kpts0 = kpts0 / data["scale0"][b_id].cpu().numpy()[[1, 0]] | |
| kpts1 = kpts1 / data["scale1"][b_id].cpu().numpy()[[1, 0]] | |
| epi_errs = data["epi_errs"][b_mask].cpu().numpy() | |
| correct_mask = epi_errs < conf_thr | |
| precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0 | |
| n_correct = np.sum(correct_mask) | |
| n_gt_matches = int(data["conf_matrix_gt"][b_id].sum().cpu()) | |
| recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches) | |
| # recall might be larger than 1, since the calculation of conf_matrix_gt | |
| # uses groundtruth depths and camera poses, but epipolar distance is used here. | |
| # matching info | |
| if alpha == "dynamic": | |
| alpha = dynamic_alpha(len(correct_mask)) | |
| color = error_colormap(epi_errs, conf_thr, alpha=alpha) | |
| text = [ | |
| f"#Matches {len(kpts0)}", | |
| f"Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}", | |
| f"Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}", | |
| ] | |
| # make the figure | |
| figure = make_matching_figure(img0, img1, kpts0, kpts1, color, text=text) | |
| return figure | |
| def _make_evaluation_figure_offset(data, b_id, alpha="dynamic", side=""): | |
| layer_num = data["predict_flow"][0].shape[0] | |
| b_mask = data["offset_bids" + side] == b_id | |
| conf_thr = 2e-3 # hardcode for scannet(coarse level) | |
| img0 = (data["image0"][b_id][0].cpu().numpy() * 255).round().astype(np.int32) | |
| img1 = (data["image1"][b_id][0].cpu().numpy() * 255).round().astype(np.int32) | |
| figure_list = [] | |
| # draw offset matches in different layers | |
| for layer_index in range(layer_num): | |
| l_mask = data["offset_lids" + side] == layer_index | |
| mask = l_mask & b_mask | |
| kpts0 = data["offset_kpts0_f" + side][mask].cpu().numpy() | |
| kpts1 = data["offset_kpts1_f" + side][mask].cpu().numpy() | |
| epi_errs = data["epi_errs_offset" + side][mask].cpu().numpy() | |
| correct_mask = epi_errs < conf_thr | |
| precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0 | |
| n_correct = np.sum(correct_mask) | |
| n_gt_matches = int(data["conf_matrix_gt"][b_id].sum().cpu()) | |
| recall = 0 if n_gt_matches == 0 else n_correct / (n_gt_matches) | |
| # recall might be larger than 1, since the calculation of conf_matrix_gt | |
| # uses groundtruth depths and camera poses, but epipolar distance is used here. | |
| # matching info | |
| if alpha == "dynamic": | |
| alpha = dynamic_alpha(len(correct_mask)) | |
| color = error_colormap(epi_errs, conf_thr, alpha=alpha) | |
| text = [ | |
| f"#Matches {len(kpts0)}", | |
| f"Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}", | |
| f"Recall({conf_thr:.2e}) ({100 * recall:.1f}%): {n_correct}/{n_gt_matches}", | |
| ] | |
| # make the figure | |
| # import pdb;pdb.set_trace() | |
| figure = make_matching_figure( | |
| deepcopy(img0), deepcopy(img1), kpts0, kpts1, color, text=text | |
| ) | |
| figure_list.append(figure) | |
| return figure | |
| def _make_confidence_figure(data, b_id): | |
| # TODO: Implement confidence figure | |
| raise NotImplementedError() | |
| def make_matching_figures(data, config, mode="evaluation"): | |
| """Make matching figures for a batch. | |
| Args: | |
| data (Dict): a batch updated by PL_LoFTR. | |
| config (Dict): matcher config | |
| Returns: | |
| figures (Dict[str, List[plt.figure]] | |
| """ | |
| assert mode in ["evaluation", "confidence"] # 'confidence' | |
| figures = {mode: []} | |
| for b_id in range(data["image0"].size(0)): | |
| if mode == "evaluation": | |
| fig = _make_evaluation_figure( | |
| data, b_id, alpha=config.TRAINER.PLOT_MATCHES_ALPHA | |
| ) | |
| elif mode == "confidence": | |
| fig = _make_confidence_figure(data, b_id) | |
| else: | |
| raise ValueError(f"Unknown plot mode: {mode}") | |
| figures[mode].append(fig) | |
| return figures | |
| def make_matching_figures_offset(data, config, mode="evaluation", side=""): | |
| """Make matching figures for a batch. | |
| Args: | |
| data (Dict): a batch updated by PL_LoFTR. | |
| config (Dict): matcher config | |
| Returns: | |
| figures (Dict[str, List[plt.figure]] | |
| """ | |
| assert mode in ["evaluation", "confidence"] # 'confidence' | |
| figures = {mode: []} | |
| for b_id in range(data["image0"].size(0)): | |
| if mode == "evaluation": | |
| fig = _make_evaluation_figure_offset( | |
| data, b_id, alpha=config.TRAINER.PLOT_MATCHES_ALPHA, side=side | |
| ) | |
| elif mode == "confidence": | |
| fig = _make_evaluation_figure_offset(data, b_id) | |
| else: | |
| raise ValueError(f"Unknown plot mode: {mode}") | |
| figures[mode].append(fig) | |
| return figures | |
| def dynamic_alpha( | |
| n_matches, milestones=[0, 300, 1000, 2000], alphas=[1.0, 0.8, 0.4, 0.2] | |
| ): | |
| if n_matches == 0: | |
| return 1.0 | |
| ranges = list(zip(alphas, alphas[1:] + [None])) | |
| loc = bisect.bisect_right(milestones, n_matches) - 1 | |
| _range = ranges[loc] | |
| if _range[1] is None: | |
| return _range[0] | |
| return _range[1] + (milestones[loc + 1] - n_matches) / ( | |
| milestones[loc + 1] - milestones[loc] | |
| ) * (_range[0] - _range[1]) | |
| def error_colormap(err, thr, alpha=1.0): | |
| assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}" | |
| x = 1 - np.clip(err / (thr * 2), 0, 1) | |
| return np.clip( | |
| np.stack([2 - x * 2, x * 2, np.zeros_like(x), np.ones_like(x) * alpha], -1), | |
| 0, | |
| 1, | |
| ) | |