Spaces:
Running
Running
import numpy as np | |
import argparse | |
import copy | |
import os, sys | |
import open3d as o3d | |
from sys import argv, exit | |
from PIL import Image | |
import math | |
from tqdm import tqdm | |
import cv2 | |
sys.path.append("../../") | |
from lib.extractMatchTop import getPerspKeypoints, getPerspKeypointsEnsemble, siftMatching | |
import pandas as pd | |
import torch | |
from lib.model_test import D2Net | |
#### Cuda #### | |
use_cuda = torch.cuda.is_available() | |
device = torch.device('cuda:0' if use_cuda else 'cpu') | |
#### Argument Parsing #### | |
parser = argparse.ArgumentParser(description='RoRD ICP evaluation on a DiverseView dataset sequence.') | |
parser.add_argument('--dataset', type=str, default='/scratch/udit/realsense/RoRD_data/preprocessed/', | |
help='path to the dataset folder') | |
parser.add_argument('--sequence', type=str, default='data1') | |
parser.add_argument( | |
'--output_dir', type=str, default='out', | |
help='output directory for RT estimates' | |
) | |
parser.add_argument( | |
'--model_rord', type=str, help='path to the RoRD model for evaluation' | |
) | |
parser.add_argument( | |
'--model_d2', type=str, help='path to the vanilla D2-Net model for evaluation' | |
) | |
parser.add_argument( | |
'--model_ens', action='store_true', | |
help='ensemble model of RoRD + D2-Net' | |
) | |
parser.add_argument( | |
'--sift', action='store_true', | |
help='Sift' | |
) | |
parser.add_argument( | |
'--viz3d', action='store_true', | |
help='visualize the pointcloud registrations' | |
) | |
parser.add_argument( | |
'--log_interval', type=int, default=9, | |
help='Matched image logging interval' | |
) | |
parser.add_argument( | |
'--camera_file', type=str, default='../../configs/camera.txt', | |
help='path to the camera intrinsics file. In order: focal_x, focal_y, center_x, center_y, scaling_factor.' | |
) | |
parser.add_argument( | |
'--persp', action='store_true', default=False, | |
help='Feature matching on perspective images.' | |
) | |
parser.set_defaults(fp16=False) | |
args = parser.parse_args() | |
if args.model_ens: # Change default paths accordingly for ensemble | |
model1_ens = '../../models/rord.pth' | |
model2_ens = '../../models/d2net.pth' | |
def draw_registration_result(source, target, transformation): | |
source_temp = copy.deepcopy(source) | |
target_temp = copy.deepcopy(target) | |
source_temp.transform(transformation) | |
trgSph.append(source_temp); trgSph.append(target_temp) | |
axis1 = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) | |
axis2 = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) | |
axis2.transform(transformation) | |
trgSph.append(axis1); trgSph.append(axis2) | |
o3d.visualization.draw_geometries(trgSph) | |
def readDepth(depthFile): | |
depth = Image.open(depthFile) | |
if depth.mode != "I": | |
raise Exception("Depth image is not in intensity format") | |
return np.asarray(depth) | |
def readCamera(camera): | |
with open (camera, "rt") as file: | |
contents = file.read().split() | |
focalX = float(contents[0]) | |
focalY = float(contents[1]) | |
centerX = float(contents[2]) | |
centerY = float(contents[3]) | |
scalingFactor = float(contents[4]) | |
return focalX, focalY, centerX, centerY, scalingFactor | |
def getPointCloud(rgbFile, depthFile, pts): | |
thresh = 15.0 | |
depth = readDepth(depthFile) | |
rgb = Image.open(rgbFile) | |
points = [] | |
colors = [] | |
corIdx = [-1]*len(pts) | |
corPts = [None]*len(pts) | |
ptIdx = 0 | |
for v in range(depth.shape[0]): | |
for u in range(depth.shape[1]): | |
Z = depth[v, u] / scalingFactor | |
if Z==0: continue | |
if (Z > thresh): continue | |
X = (u - centerX) * Z / focalX | |
Y = (v - centerY) * Z / focalY | |
points.append((X, Y, Z)) | |
colors.append(rgb.getpixel((u, v))) | |
if((u, v) in pts): | |
index = pts.index((u, v)) | |
corIdx[index] = ptIdx | |
corPts[index] = (X, Y, Z) | |
ptIdx = ptIdx+1 | |
points = np.asarray(points) | |
colors = np.asarray(colors) | |
pcd = o3d.geometry.PointCloud() | |
pcd.points = o3d.utility.Vector3dVector(points) | |
pcd.colors = o3d.utility.Vector3dVector(colors/255) | |
return pcd, corIdx, corPts | |
def convertPts(A): | |
X = A[0]; Y = A[1] | |
x = []; y = [] | |
for i in range(len(X)): | |
x.append(int(float(X[i]))) | |
for i in range(len(Y)): | |
y.append(int(float(Y[i]))) | |
pts = [] | |
for i in range(len(x)): | |
pts.append((x[i], y[i])) | |
return pts | |
def getSphere(pts): | |
sphs = [] | |
for element in pts: | |
if(element is not None): | |
sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.03) | |
sphere.paint_uniform_color([0.9, 0.2, 0]) | |
trans = np.identity(4) | |
trans[0, 3] = element[0] | |
trans[1, 3] = element[1] | |
trans[2, 3] = element[2] | |
sphere.transform(trans) | |
sphs.append(sphere) | |
return sphs | |
def get3dCor(src, trg): | |
corr = [] | |
for sId, tId in zip(src, trg): | |
if(sId != -1 and tId != -1): | |
corr.append((sId, tId)) | |
corr = np.asarray(corr) | |
return corr | |
if __name__ == "__main__": | |
camera_file = args.camera_file | |
rgb_csv = args.dataset + args.sequence + '/rtImagesRgb.csv' | |
depth_csv = args.dataset + args.sequence + '/rtImagesDepth.csv' | |
os.makedirs(os.path.join(args.output_dir, 'vis'), exist_ok=True) | |
dir_name = args.output_dir | |
os.makedirs(args.output_dir, exist_ok=True) | |
focalX, focalY, centerX, centerY, scalingFactor = readCamera(camera_file) | |
df_rgb = pd.read_csv(rgb_csv) | |
df_dep = pd.read_csv(depth_csv) | |
model1 = D2Net(model_file=args.model_d2).to(device) | |
model2 = D2Net(model_file=args.model_rord).to(device) | |
queryId = 0 | |
for im_q, dep_q in tqdm(zip(df_rgb['query'], df_dep['query']), total=df_rgb.shape[0]): | |
filter_list = [] | |
dbId = 0 | |
for im_d, dep_d in tqdm(zip(df_rgb.iteritems(), df_dep.iteritems()), total=df_rgb.shape[1]): | |
if im_d[0] == 'query': | |
continue | |
rgb_name_src = os.path.basename(im_q) | |
H_name_src = os.path.splitext(rgb_name_src)[0] + '.npy' | |
srcH = args.dataset + args.sequence + '/rgb/' + H_name_src | |
rgb_name_trg = os.path.basename(im_d[1][1]) | |
H_name_trg = os.path.splitext(rgb_name_trg)[0] + '.npy' | |
trgH = args.dataset + args.sequence + '/rgb/' + H_name_trg | |
srcImg = srcH.replace('.npy', '.jpg') | |
trgImg = trgH.replace('.npy', '.jpg') | |
if args.model_rord: | |
if args.persp: | |
srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, HFile1=None, HFile2=None, model=model2, device=device) | |
else: | |
srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, srcH, trgH, model2, device) | |
elif args.model_d2: | |
if args.persp: | |
srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, HFile1=None, HFile2=None, model=model2, device=device) | |
else: | |
srcPts, trgPts, matchImg, _ = getPerspKeypoints(srcImg, trgImg, srcH, trgH, model1, device) | |
elif args.model_ens: | |
model1 = D2Net(model_file=model1_ens) | |
model1 = model1.to(device) | |
model2 = D2Net(model_file=model2_ens) | |
model2 = model2.to(device) | |
srcPts, trgPts, matchImg = getPerspKeypointsEnsemble(model1, model2, srcImg, trgImg, srcH, trgH, device) | |
elif args.sift: | |
if args.persp: | |
srcPts, trgPts, matchImg, _ = siftMatching(srcImg, trgImg, HFile1=None, HFile2=None, device=device) | |
else: | |
srcPts, trgPts, matchImg, _ = siftMatching(srcImg, trgImg, srcH, trgH, device) | |
if(isinstance(srcPts, list) == True): | |
print(np.identity(4)) | |
filter_list.append(np.identity(4)) | |
continue | |
srcPts = convertPts(srcPts) | |
trgPts = convertPts(trgPts) | |
depth_name_src = os.path.dirname(os.path.dirname(args.dataset)) + '/' + dep_q | |
depth_name_trg = os.path.dirname(os.path.dirname(args.dataset)) + '/' + dep_d[1][1] | |
srcCld, srcIdx, srcCor = getPointCloud(srcImg, depth_name_src, srcPts) | |
trgCld, trgIdx, trgCor = getPointCloud(trgImg, depth_name_trg, trgPts) | |
srcSph = getSphere(srcCor) | |
trgSph = getSphere(trgCor) | |
axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5, origin=[0, 0, 0]) | |
srcSph.append(srcCld); srcSph.append(axis) | |
trgSph.append(trgCld); trgSph.append(axis) | |
corr = get3dCor(srcIdx, trgIdx) | |
p2p = o3d.pipelines.registration.TransformationEstimationPointToPoint() | |
trans_init = p2p.compute_transformation(srcCld, trgCld, o3d.utility.Vector2iVector(corr)) | |
# print(trans_init) | |
filter_list.append(trans_init) | |
if args.viz3d: | |
o3d.visualization.draw_geometries(srcSph) | |
o3d.visualization.draw_geometries(trgSph) | |
draw_registration_result(srcCld, trgCld, trans_init) | |
if(dbId%args.log_interval == 0): | |
cv2.imwrite(os.path.join(args.output_dir, 'vis') + "/matchImg.%02d.%02d.jpg"%(queryId, dbId//args.log_interval), matchImg) | |
dbId += 1 | |
RT = np.stack(filter_list).transpose(1,2,0) | |
np.save(os.path.join(dir_name, str(queryId) + '.npy'), RT) | |
queryId += 1 | |
print('-----check-------', RT.shape) | |