Spaces:
Running
Running
import os.path as osp | |
import numpy as np | |
import torch | |
from roma.utils import * | |
from PIL import Image | |
from tqdm import tqdm | |
class ScanNetBenchmark: | |
def __init__(self, data_root="data/scannet") -> None: | |
self.data_root = data_root | |
def benchmark(self, model, model_name = None): | |
model.train(False) | |
with torch.no_grad(): | |
data_root = self.data_root | |
tmp = np.load(osp.join(data_root, "test.npz")) | |
pairs, rel_pose = tmp["name"], tmp["rel_pose"] | |
tot_e_t, tot_e_R, tot_e_pose = [], [], [] | |
pair_inds = np.random.choice( | |
range(len(pairs)), size=len(pairs), replace=False | |
) | |
for pairind in tqdm(pair_inds, smoothing=0.9): | |
scene = pairs[pairind] | |
scene_name = f"scene0{scene[0]}_00" | |
im_A_path = osp.join( | |
self.data_root, | |
"scans_test", | |
scene_name, | |
"color", | |
f"{scene[2]}.jpg", | |
) | |
im_A = Image.open(im_A_path) | |
im_B_path = osp.join( | |
self.data_root, | |
"scans_test", | |
scene_name, | |
"color", | |
f"{scene[3]}.jpg", | |
) | |
im_B = Image.open(im_B_path) | |
T_gt = rel_pose[pairind].reshape(3, 4) | |
R, t = T_gt[:3, :3], T_gt[:3, 3] | |
K = np.stack( | |
[ | |
np.array([float(i) for i in r.split()]) | |
for r in open( | |
osp.join( | |
self.data_root, | |
"scans_test", | |
scene_name, | |
"intrinsic", | |
"intrinsic_color.txt", | |
), | |
"r", | |
) | |
.read() | |
.split("\n") | |
if r | |
] | |
) | |
w1, h1 = im_A.size | |
w2, h2 = im_B.size | |
K1 = K.copy() | |
K2 = K.copy() | |
dense_matches, dense_certainty = model.match(im_A_path, im_B_path) | |
sparse_matches, sparse_certainty = model.sample( | |
dense_matches, dense_certainty, 5000 | |
) | |
scale1 = 480 / min(w1, h1) | |
scale2 = 480 / min(w2, h2) | |
w1, h1 = scale1 * w1, scale1 * h1 | |
w2, h2 = scale2 * w2, scale2 * h2 | |
K1 = K1 * scale1 | |
K2 = K2 * scale2 | |
offset = 0.5 | |
kpts1 = sparse_matches[:, :2] | |
kpts1 = ( | |
np.stack( | |
( | |
w1 * (kpts1[:, 0] + 1) / 2 - offset, | |
h1 * (kpts1[:, 1] + 1) / 2 - offset, | |
), | |
axis=-1, | |
) | |
) | |
kpts2 = sparse_matches[:, 2:] | |
kpts2 = ( | |
np.stack( | |
( | |
w2 * (kpts2[:, 0] + 1) / 2 - offset, | |
h2 * (kpts2[:, 1] + 1) / 2 - offset, | |
), | |
axis=-1, | |
) | |
) | |
for _ in range(5): | |
shuffling = np.random.permutation(np.arange(len(kpts1))) | |
kpts1 = kpts1[shuffling] | |
kpts2 = kpts2[shuffling] | |
try: | |
norm_threshold = 0.5 / ( | |
np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2]))) | |
R_est, t_est, mask = estimate_pose( | |
kpts1, | |
kpts2, | |
K1, | |
K2, | |
norm_threshold, | |
conf=0.99999, | |
) | |
T1_to_2_est = np.concatenate((R_est, t_est), axis=-1) # | |
e_t, e_R = compute_pose_error(T1_to_2_est, R, t) | |
e_pose = max(e_t, e_R) | |
except Exception as e: | |
print(repr(e)) | |
e_t, e_R = 90, 90 | |
e_pose = max(e_t, e_R) | |
tot_e_t.append(e_t) | |
tot_e_R.append(e_R) | |
tot_e_pose.append(e_pose) | |
tot_e_t.append(e_t) | |
tot_e_R.append(e_R) | |
tot_e_pose.append(e_pose) | |
tot_e_pose = np.array(tot_e_pose) | |
thresholds = [5, 10, 20] | |
auc = pose_auc(tot_e_pose, thresholds) | |
acc_5 = (tot_e_pose < 5).mean() | |
acc_10 = (tot_e_pose < 10).mean() | |
acc_15 = (tot_e_pose < 15).mean() | |
acc_20 = (tot_e_pose < 20).mean() | |
map_5 = acc_5 | |
map_10 = np.mean([acc_5, acc_10]) | |
map_20 = np.mean([acc_5, acc_10, acc_15, acc_20]) | |
return { | |
"auc_5": auc[0], | |
"auc_10": auc[1], | |
"auc_20": auc[2], | |
"map_5": map_5, | |
"map_10": map_10, | |
"map_20": map_20, | |
} | |