# coding=utf-8 # Copyright 2020 The Google AI Perception Team Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Estimate AIST++ camera parameters.""" import json import math import os import random from absl import app from absl import flags from aist_plusplus.loader import AISTDataset import aniposelib import numpy as np import vedo import cv2 from scipy.spatial.transform import Rotation as R FLAGS = flags.FLAGS flags.DEFINE_string( 'anno_dir', '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/', 'input local dictionary for AIST++ annotations.') flags.DEFINE_string( 'save_dir', '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/', 'output local dictionary that stores AIST++ camera parameters.') flags.DEFINE_bool( 'visualize', False, 'Whether to visualize the cameras for debugging.') random.seed(0) np.random.seed(0) def plot_cameras(cgroup): points_world = np.array([ [40., 0., 0.], # arrow x: red [0., 40., 0.], # arrow y: green [0., 0., 40.], # arrow z: blue ]) colors = ['r', 'g', 'b'] axes_all = [ vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i]) for i in range(3)] for camera in cgroup.cameras: rot_mat = cv2.Rodrigues(camera.rvec)[0] cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec) points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world) axes_all += [ vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i]) for i in range(3)] axes_all += [vedo.Text(camera.name, cam_center, s=10)] return axes_all def init_env_cameras(): """Trys to estimate the environment manually.""" cams = [] for i, view in enumerate(AISTDataset.VIEWS): f = 1600 cx = 1920 // 2 cy = 1080 // 2 if view == 'c09': r1 = R.from_euler('y', 180, degrees=True) r2 = R.from_euler('z', 180, degrees=True) rvec = (r1 * r2).as_rotvec() tvec = [0, 170, 500] else: r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True) r2 = R.from_euler('z', 180, degrees=True) rvec = (r1 * r2).as_rotvec() tvec = [0, 180, 500] matrix = np.array([ [f, 0, cx], [0, f, cy], [0, 0, 1], ], dtype=np.float32) cams.append( aniposelib.cameras.Camera( matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080))) cgroup = aniposelib.cameras.CameraGroup(cams) return cgroup def main(_): aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir) for env_name, seq_names in aist_dataset.mapping_env2seq.items(): # Init camera parameters cgroup = init_env_cameras() # Select a set of sequences for optimizing camera parameters. seq_names = random.choices(seq_names, k=20) # Load 2D keypoints keypoints2d_all = [] for seq_name in seq_names: keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d( aist_dataset.keypoint2d_dir, seq_name=seq_name) # Special cases if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01': keypoints2d_raw[4] = np.nan # not synced view if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05': keypoints2d_raw[6] = np.nan # size 640x480 keypoints2d_all.append(keypoints2d_raw) keypoints2d_all = np.concatenate(keypoints2d_all, axis=1) # Filter keypoints to select those best points kpt_thre = 0.5 ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre) keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan keypoints2d_all = keypoints2d_all[..., 0:2] # Apply bundle adjustment and dump the camera parameters nviews = keypoints2d_all.shape[0] cgroup.bundle_adjust_iter( keypoints2d_all.reshape(nviews, -1, 2), n_iters=20, n_samp_iter=500, n_samp_full=5000, verbose=True) os.makedirs(FLAGS.save_dir, exist_ok=True) camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json') with open(camera_file, 'w') as f: json.dump([camera.get_dict() for camera in cgroup.cameras], f) # visualize the world with one frame if FLAGS.visualize: print("seq_name:", seq_name) axes_all = plot_cameras(cgroup) keypoints3d = cgroup.triangulate( keypoints2d_all[:, 0].reshape(nviews, -1, 2) ).reshape(-1, 3) vedo.show( *axes_all, vedo.Points(keypoints3d, r=12), interactive=True, axes=True) vedo.clear() if __name__ == '__main__': app.run(main)