meng2003's picture
Upload 357 files
2d5fdd1
# coding=utf-8
# Copyright 2020 The Google AI Perception Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Estimate AIST++ camera parameters."""
import json
import math
import os
import random
from absl import app
from absl import flags
from aist_plusplus.loader import AISTDataset
import aniposelib
import numpy as np
import vedo
import cv2
from scipy.spatial.transform import Rotation as R
FLAGS = flags.FLAGS
flags.DEFINE_string(
'anno_dir',
'/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/',
'input local dictionary for AIST++ annotations.')
flags.DEFINE_string(
'save_dir',
'/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/',
'output local dictionary that stores AIST++ camera parameters.')
flags.DEFINE_bool(
'visualize', False,
'Whether to visualize the cameras for debugging.')
random.seed(0)
np.random.seed(0)
def plot_cameras(cgroup):
points_world = np.array([
[40., 0., 0.], # arrow x: red
[0., 40., 0.], # arrow y: green
[0., 0., 40.], # arrow z: blue
])
colors = ['r', 'g', 'b']
axes_all = [
vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i])
for i in range(3)]
for camera in cgroup.cameras:
rot_mat = cv2.Rodrigues(camera.rvec)[0]
cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec)
points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world)
axes_all += [
vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i])
for i in range(3)]
axes_all += [vedo.Text(camera.name, cam_center, s=10)]
return axes_all
def init_env_cameras():
"""Trys to estimate the environment manually."""
cams = []
for i, view in enumerate(AISTDataset.VIEWS):
f = 1600
cx = 1920 // 2
cy = 1080 // 2
if view == 'c09':
r1 = R.from_euler('y', 180, degrees=True)
r2 = R.from_euler('z', 180, degrees=True)
rvec = (r1 * r2).as_rotvec()
tvec = [0, 170, 500]
else:
r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True)
r2 = R.from_euler('z', 180, degrees=True)
rvec = (r1 * r2).as_rotvec()
tvec = [0, 180, 500]
matrix = np.array([
[f, 0, cx],
[0, f, cy],
[0, 0, 1],
], dtype=np.float32)
cams.append(
aniposelib.cameras.Camera(
matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080)))
cgroup = aniposelib.cameras.CameraGroup(cams)
return cgroup
def main(_):
aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir)
for env_name, seq_names in aist_dataset.mapping_env2seq.items():
# Init camera parameters
cgroup = init_env_cameras()
# Select a set of sequences for optimizing camera parameters.
seq_names = random.choices(seq_names, k=20)
# Load 2D keypoints
keypoints2d_all = []
for seq_name in seq_names:
keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d(
aist_dataset.keypoint2d_dir, seq_name=seq_name)
# Special cases
if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01':
keypoints2d_raw[4] = np.nan # not synced view
if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05':
keypoints2d_raw[6] = np.nan # size 640x480
keypoints2d_all.append(keypoints2d_raw)
keypoints2d_all = np.concatenate(keypoints2d_all, axis=1)
# Filter keypoints to select those best points
kpt_thre = 0.5
ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre)
keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan
keypoints2d_all = keypoints2d_all[..., 0:2]
# Apply bundle adjustment and dump the camera parameters
nviews = keypoints2d_all.shape[0]
cgroup.bundle_adjust_iter(
keypoints2d_all.reshape(nviews, -1, 2),
n_iters=20,
n_samp_iter=500,
n_samp_full=5000,
verbose=True)
os.makedirs(FLAGS.save_dir, exist_ok=True)
camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json')
with open(camera_file, 'w') as f:
json.dump([camera.get_dict() for camera in cgroup.cameras], f)
# visualize the world with one frame
if FLAGS.visualize:
print("seq_name:", seq_name)
axes_all = plot_cameras(cgroup)
keypoints3d = cgroup.triangulate(
keypoints2d_all[:, 0].reshape(nviews, -1, 2)
).reshape(-1, 3)
vedo.show(
*axes_all, vedo.Points(keypoints3d, r=12),
interactive=True, axes=True)
vedo.clear()
if __name__ == '__main__':
app.run(main)