File size: 5,008 Bytes
2d5fdd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# coding=utf-8
# Copyright 2020 The Google AI Perception Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Estimate AIST++ camera parameters."""
import json
import math
import os
import random

from absl import app
from absl import flags
from aist_plusplus.loader import AISTDataset
import aniposelib
import numpy as np
import vedo
import cv2
from scipy.spatial.transform import Rotation as R

FLAGS = flags.FLAGS
flags.DEFINE_string(
    'anno_dir',
    '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/',
    'input local dictionary for AIST++ annotations.')
flags.DEFINE_string(
    'save_dir',
    '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/',
    'output local dictionary that stores AIST++ camera parameters.')
flags.DEFINE_bool(
    'visualize', False,
    'Whether to visualize the cameras for debugging.')
random.seed(0)
np.random.seed(0)


def plot_cameras(cgroup):
  points_world = np.array([
    [40., 0., 0.],  # arrow x: red
    [0., 40., 0.],  # arrow y: green
    [0., 0., 40.],  # arrow z: blue
  ])
  colors = ['r', 'g', 'b']
  axes_all = [
    vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i]) 
    for i in range(3)]
  for camera in cgroup.cameras:
    rot_mat = cv2.Rodrigues(camera.rvec)[0]
    cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec) 
    points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world)
    axes_all += [
      vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i]) 
      for i in range(3)]
    axes_all += [vedo.Text(camera.name, cam_center, s=10)]
  return axes_all


def init_env_cameras():
  """Trys to estimate the environment manually."""
  cams = []
  for i, view in enumerate(AISTDataset.VIEWS):
    f = 1600
    cx = 1920 // 2
    cy = 1080 // 2
    if view == 'c09':
      r1 = R.from_euler('y', 180, degrees=True) 
      r2 = R.from_euler('z', 180, degrees=True)
      rvec = (r1 * r2).as_rotvec()
      tvec = [0, 170, 500]
    else:
      r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True) 
      r2 = R.from_euler('z', 180, degrees=True)
      rvec = (r1 * r2).as_rotvec()
      tvec = [0, 180, 500]

    matrix = np.array([
        [f, 0, cx],
        [0, f, cy],
        [0, 0, 1],
    ], dtype=np.float32)
    cams.append(
        aniposelib.cameras.Camera(
            matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080)))
  cgroup = aniposelib.cameras.CameraGroup(cams)
  return cgroup


def main(_):
  aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir)

  for env_name, seq_names in aist_dataset.mapping_env2seq.items():
    # Init camera parameters
    cgroup = init_env_cameras()

    # Select a set of sequences for optimizing camera parameters.
    seq_names = random.choices(seq_names, k=20)

    # Load 2D keypoints
    keypoints2d_all = []
    for seq_name in seq_names:
      keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d(
          aist_dataset.keypoint2d_dir, seq_name=seq_name)
      # Special cases
      if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01':
        keypoints2d_raw[4] = np.nan  # not synced view
      if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05':
        keypoints2d_raw[6] = np.nan  # size 640x480
      keypoints2d_all.append(keypoints2d_raw)
    keypoints2d_all = np.concatenate(keypoints2d_all, axis=1)

    # Filter keypoints to select those best points
    kpt_thre = 0.5
    ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre)
    keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan
    keypoints2d_all = keypoints2d_all[..., 0:2]

    # Apply bundle adjustment and dump the camera parameters
    nviews = keypoints2d_all.shape[0]
    cgroup.bundle_adjust_iter(
        keypoints2d_all.reshape(nviews, -1, 2),
        n_iters=20,
        n_samp_iter=500,
        n_samp_full=5000,
        verbose=True)
    os.makedirs(FLAGS.save_dir, exist_ok=True)
    camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json')
    with open(camera_file, 'w') as f:
      json.dump([camera.get_dict() for camera in cgroup.cameras], f)

    # visualize the world with one frame
    if FLAGS.visualize:
      print("seq_name:", seq_name)
      axes_all = plot_cameras(cgroup)
      keypoints3d = cgroup.triangulate(
          keypoints2d_all[:, 0].reshape(nviews, -1, 2)
      ).reshape(-1, 3)
      vedo.show(
        *axes_all, vedo.Points(keypoints3d, r=12), 
        interactive=True, axes=True)
      vedo.clear()


if __name__ == '__main__':
  app.run(main)