instant-ngp / scripts /nsvf2nerf.py
camenduru's picture
instant-ngp build
7873319
#!/usr/bin/env python3
# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import argparse
import os
import numpy as np
import json
import sys
import math
import cv2
import glob
def parse_args():
parser = argparse.ArgumentParser(description="convert a dataset from the nsvf paper format to nerf format transforms.json")
parser.add_argument("--aabb_scale", default=1, help="large scene scale factor")
parser.add_argument("--white_transparent", action="store_true", help="White is transparent")
parser.add_argument("--black_transparent", action="store_true", help="White is transparent")
args = parser.parse_args()
return args
def variance_of_laplacian(image):
return cv2.Laplacian(image, cv2.CV_64F).var()
def sharpness(imagePath):
image = cv2.imread(imagePath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
fm = variance_of_laplacian(gray)
return fm
if __name__ == "__main__":
args = parse_args()
AABB_SCALE = int(args.aabb_scale)
SKIP_EARLY = 0
IMAGE_FOLDER = "."
img_files = [[],[],[]]
img_files[0] = sorted(glob.glob(os.path.join(IMAGE_FOLDER, "rgb", f"0_*.png")))
img_files[1] = sorted(glob.glob(os.path.join(IMAGE_FOLDER, "rgb", f"1_*.png")))
img_files[2] = sorted(glob.glob(os.path.join(IMAGE_FOLDER, "rgb", f"2_*.png")))
xx = open("bbox.txt").readline().strip().split(" ")
xx = [x for x in xx if x] # remove empty elements
bbox = tuple(map(float,xx))
image = cv2.imread(img_files[0][0],cv2.IMREAD_UNCHANGED)
w = image.shape[1]
h = image.shape[0]
if (image.shape[2] == 3 or (image.shape[2] == 4 and image[0][0][3] != 0)):
x = w-1
if (image[0][0][0] == 0 and image[0][0][1] == 0 and image[0][0][2] == 0):
print("black opaque background detected")
args.black_transparent=True
elif (image[0][0][0] == 255 and image[0][0][1] == 255 and image[0][0][2] == 255):
print("white opaque background detected")
args.white_transparent=True
elif (image[0][x][0] == 0 and image[0][x][1] == 0 and image[0][x][2] == 0):
print("black opaque background detected")
args.black_transparent=True
elif (image[0][x][0] == 255 and image[0][x][1] == 255 and image[0][x][2] == 255):
print("white opaque background detected")
args.white_transparent=True
else:
print("cant detect background")
exit()
elif (image.shape[2] == 4):
print("transparent alpha channel detected, first pixel alpha = ", image[0][0][3])
lines = map(str.strip,open("intrinsics.txt","r").readlines())
els = tuple(map(float, " ".join(lines).split(" ")))
print(els)
if len(els) == 11:
fl_x = els[0]
fl_y = els[0]
cx = els[1]
cy = els[2]
elif len(els) == 16:
angle_x=math.pi/2
fl_x = els[0]
fl_y = els[5]
cx = els[2]
cy = els[6]
else:
print("dont understand intrinsics file", els)
exit()
# fl = 0.5 * w / tan(0.5 * angle_x);
angle_x = math.atan(w/(fl_x*2))*2
angle_y = math.atan(h/(fl_y*2))*2
fovx = angle_x*180/math.pi
fovy = angle_y*180/math.pi
k1 = 0
k2 = 0
p1 = 0
p2 = 0
print(f"camera:\n\tres={w,h}\n\tcenter={cx,cy}\n\tfocal={fl_x,fl_y}\n\tfov={fovx,fovy}\n\tk={k1,k2} p={p1,p2}")
centroid = [(bbox[0]+bbox[3])*0.5,(bbox[1]+bbox[4])*0.5,(bbox[2]+bbox[5])*0.5]
print("bbox is ", bbox)
print("centroid is ", centroid)
radius = [(bbox[3]-bbox[0])*0.5,(bbox[4]-bbox[1])*0.5,(bbox[5]-bbox[2])*0.5]
scale = 0.5/np.max(radius)
print("radius is ", np.max(radius))
for itype in [0,1,2]:
if (img_files[2]):
OUT_PATH = ["transforms_train.json", "transforms_val.json", "transforms_test.json"][itype]
else:
OUT_PATH = ["transforms_train.json", "transforms_test.json", ""][itype]
if OUT_PATH == "":
break
out = {
"camera_angle_x": angle_x,
"camera_angle_y": angle_y,
"fl_x": fl_x,
"fl_y": fl_y,
"k1": k1,
"k2": k2,
"p1": p1,
"p2": p2,
"cx": cx,
"cy": cy,
"w": w,
"h": h,
"scale": 1,
"white_transparent": args.white_transparent,
"black_transparent": args.black_transparent,
"aabb_scale": AABB_SCALE,"frames":[]
}
for img_f in img_files[itype]:
pose_f = os.path.join(IMAGE_FOLDER,"pose",os.path.splitext(os.path.basename(img_f))[0]+".txt")
elems = tuple(map(float," ".join(open(pose_f).readlines()).split(" ")))
name = img_f
m = np.array(elems).reshape(4,4)
b = sharpness(name)
#print(name, "sharpness=",b)
c2w = m # np.linalg.inv(m)
c2w[0:3,3] -= centroid
c2w[0:3,3] *= scale
#print(name,c2w)
c2w[0:3,2] *= -1 # flip the y and z axis
c2w[0:3,1] *= -1
c2w = c2w[[0,2,1,3],:] # swap y and z 012 201 102
c2w[2,:] *= -1 # flip whole world upside down
frame = {"file_path": name, "sharpness": b, "transform_matrix": c2w}
out["frames"].append(frame)
nframes = len(out["frames"])
for f in out["frames"]:
f["transform_matrix"] = f["transform_matrix"].tolist()
print(nframes,"frames")
print(f"writing {OUT_PATH}...")
with open(OUT_PATH, "w") as outfile:
json.dump(out, outfile, indent=2)