Spaces:
Runtime error
Runtime error
# coding=utf-8 | |
# Copyright 2020 The Google AI Perception Team Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Estimate AIST++ camera parameters.""" | |
import json | |
import math | |
import os | |
import random | |
from absl import app | |
from absl import flags | |
from aist_plusplus.loader import AISTDataset | |
import aniposelib | |
import numpy as np | |
import vedo | |
import cv2 | |
from scipy.spatial.transform import Rotation as R | |
FLAGS = flags.FLAGS | |
flags.DEFINE_string( | |
'anno_dir', | |
'/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/', | |
'input local dictionary for AIST++ annotations.') | |
flags.DEFINE_string( | |
'save_dir', | |
'/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/', | |
'output local dictionary that stores AIST++ camera parameters.') | |
flags.DEFINE_bool( | |
'visualize', False, | |
'Whether to visualize the cameras for debugging.') | |
random.seed(0) | |
np.random.seed(0) | |
def plot_cameras(cgroup): | |
points_world = np.array([ | |
[40., 0., 0.], # arrow x: red | |
[0., 40., 0.], # arrow y: green | |
[0., 0., 40.], # arrow z: blue | |
]) | |
colors = ['r', 'g', 'b'] | |
axes_all = [ | |
vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i]) | |
for i in range(3)] | |
for camera in cgroup.cameras: | |
rot_mat = cv2.Rodrigues(camera.rvec)[0] | |
cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec) | |
points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world) | |
axes_all += [ | |
vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i]) | |
for i in range(3)] | |
axes_all += [vedo.Text(camera.name, cam_center, s=10)] | |
return axes_all | |
def init_env_cameras(): | |
"""Trys to estimate the environment manually.""" | |
cams = [] | |
for i, view in enumerate(AISTDataset.VIEWS): | |
f = 1600 | |
cx = 1920 // 2 | |
cy = 1080 // 2 | |
if view == 'c09': | |
r1 = R.from_euler('y', 180, degrees=True) | |
r2 = R.from_euler('z', 180, degrees=True) | |
rvec = (r1 * r2).as_rotvec() | |
tvec = [0, 170, 500] | |
else: | |
r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True) | |
r2 = R.from_euler('z', 180, degrees=True) | |
rvec = (r1 * r2).as_rotvec() | |
tvec = [0, 180, 500] | |
matrix = np.array([ | |
[f, 0, cx], | |
[0, f, cy], | |
[0, 0, 1], | |
], dtype=np.float32) | |
cams.append( | |
aniposelib.cameras.Camera( | |
matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080))) | |
cgroup = aniposelib.cameras.CameraGroup(cams) | |
return cgroup | |
def main(_): | |
aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir) | |
for env_name, seq_names in aist_dataset.mapping_env2seq.items(): | |
# Init camera parameters | |
cgroup = init_env_cameras() | |
# Select a set of sequences for optimizing camera parameters. | |
seq_names = random.choices(seq_names, k=20) | |
# Load 2D keypoints | |
keypoints2d_all = [] | |
for seq_name in seq_names: | |
keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d( | |
aist_dataset.keypoint2d_dir, seq_name=seq_name) | |
# Special cases | |
if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01': | |
keypoints2d_raw[4] = np.nan # not synced view | |
if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05': | |
keypoints2d_raw[6] = np.nan # size 640x480 | |
keypoints2d_all.append(keypoints2d_raw) | |
keypoints2d_all = np.concatenate(keypoints2d_all, axis=1) | |
# Filter keypoints to select those best points | |
kpt_thre = 0.5 | |
ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre) | |
keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan | |
keypoints2d_all = keypoints2d_all[..., 0:2] | |
# Apply bundle adjustment and dump the camera parameters | |
nviews = keypoints2d_all.shape[0] | |
cgroup.bundle_adjust_iter( | |
keypoints2d_all.reshape(nviews, -1, 2), | |
n_iters=20, | |
n_samp_iter=500, | |
n_samp_full=5000, | |
verbose=True) | |
os.makedirs(FLAGS.save_dir, exist_ok=True) | |
camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json') | |
with open(camera_file, 'w') as f: | |
json.dump([camera.get_dict() for camera in cgroup.cameras], f) | |
# visualize the world with one frame | |
if FLAGS.visualize: | |
print("seq_name:", seq_name) | |
axes_all = plot_cameras(cgroup) | |
keypoints3d = cgroup.triangulate( | |
keypoints2d_all[:, 0].reshape(nviews, -1, 2) | |
).reshape(-1, 3) | |
vedo.show( | |
*axes_all, vedo.Points(keypoints3d, r=12), | |
interactive=True, axes=True) | |
vedo.clear() | |
if __name__ == '__main__': | |
app.run(main) | |