Spaces:

meng2003
/

myTest01

Runtime error

File size: 5,008 Bytes

2d5fdd1

# coding=utf-8
# Copyright 2020 The Google AI Perception Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Estimate AIST++ camera parameters."""
import json
import math
import os
import random

from absl import app
from absl import flags
from aist_plusplus.loader import AISTDataset
import aniposelib
import numpy as np
import vedo
import cv2
from scipy.spatial.transform import Rotation as R

FLAGS = flags.FLAGS
flags.DEFINE_string(
    'anno_dir',
    '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/',
    'input local dictionary for AIST++ annotations.')
flags.DEFINE_string(
    'save_dir',
    '/usr/local/google/home/ruilongli/data/public/aist_plusplus_final/cameras/',
    'output local dictionary that stores AIST++ camera parameters.')
flags.DEFINE_bool(
    'visualize', False,
    'Whether to visualize the cameras for debugging.')
random.seed(0)
np.random.seed(0)


def plot_cameras(cgroup):
  points_world = np.array([
    [40., 0., 0.],  # arrow x: red
    [0., 40., 0.],  # arrow y: green
    [0., 0., 40.],  # arrow z: blue
  ])
  colors = ['r', 'g', 'b']
  axes_all = [
    vedo.Arrows([[0, 0, 0]], [points_world[i]]).c(colors[i]) 
    for i in range(3)]
  for camera in cgroup.cameras:
    rot_mat = cv2.Rodrigues(camera.rvec)[0]
    cam_center = - np.linalg.inv(rot_mat).dot(camera.tvec) 
    points_cam = np.einsum('ij,kj->ki', np.linalg.inv(rot_mat), points_world)
    axes_all += [
      vedo.Arrows([cam_center], [cam_center + points_cam[i]]).c(colors[i]) 
      for i in range(3)]
    axes_all += [vedo.Text(camera.name, cam_center, s=10)]
  return axes_all


def init_env_cameras():
  """Trys to estimate the environment manually."""
  cams = []
  for i, view in enumerate(AISTDataset.VIEWS):
    f = 1600
    cx = 1920 // 2
    cy = 1080 // 2
    if view == 'c09':
      r1 = R.from_euler('y', 180, degrees=True) 
      r2 = R.from_euler('z', 180, degrees=True)
      rvec = (r1 * r2).as_rotvec()
      tvec = [0, 170, 500]
    else:
      r1 = R.from_euler('y', 180 - 360 // 8 * i, degrees=True) 
      r2 = R.from_euler('z', 180, degrees=True)
      rvec = (r1 * r2).as_rotvec()
      tvec = [0, 180, 500]

    matrix = np.array([
        [f, 0, cx],
        [0, f, cy],
        [0, 0, 1],
    ], dtype=np.float32)
    cams.append(
        aniposelib.cameras.Camera(
            matrix=matrix, rvec=rvec, tvec=tvec, name=view, size=(1920, 1080)))
  cgroup = aniposelib.cameras.CameraGroup(cams)
  return cgroup


def main(_):
  aist_dataset = AISTDataset(anno_dir=FLAGS.anno_dir)

  for env_name, seq_names in aist_dataset.mapping_env2seq.items():
    # Init camera parameters
    cgroup = init_env_cameras()

    # Select a set of sequences for optimizing camera parameters.
    seq_names = random.choices(seq_names, k=20)

    # Load 2D keypoints
    keypoints2d_all = []
    for seq_name in seq_names:
      keypoints2d_raw, _, _ = AISTDataset.load_keypoint2d(
          aist_dataset.keypoint2d_dir, seq_name=seq_name)
      # Special cases
      if seq_name == 'gBR_sBM_cAll_d04_mBR0_ch01':
        keypoints2d_raw[4] = np.nan  # not synced view
      if seq_name == 'gJB_sBM_cAll_d07_mJB3_ch05':
        keypoints2d_raw[6] = np.nan  # size 640x480
      keypoints2d_all.append(keypoints2d_raw)
    keypoints2d_all = np.concatenate(keypoints2d_all, axis=1)

    # Filter keypoints to select those best points
    kpt_thre = 0.5
    ignore_idxs = np.where(keypoints2d_all[:, :, :, 2] < kpt_thre)
    keypoints2d_all[ignore_idxs[0], ignore_idxs[1], ignore_idxs[2], :] = np.nan
    keypoints2d_all = keypoints2d_all[..., 0:2]

    # Apply bundle adjustment and dump the camera parameters
    nviews = keypoints2d_all.shape[0]
    cgroup.bundle_adjust_iter(
        keypoints2d_all.reshape(nviews, -1, 2),
        n_iters=20,
        n_samp_iter=500,
        n_samp_full=5000,
        verbose=True)
    os.makedirs(FLAGS.save_dir, exist_ok=True)
    camera_file = os.path.join(FLAGS.save_dir, f'{env_name}.json')
    with open(camera_file, 'w') as f:
      json.dump([camera.get_dict() for camera in cgroup.cameras], f)

    # visualize the world with one frame
    if FLAGS.visualize:
      print("seq_name:", seq_name)
      axes_all = plot_cameras(cgroup)
      keypoints3d = cgroup.triangulate(
          keypoints2d_all[:, 0].reshape(nviews, -1, 2)
      ).reshape(-1, 3)
      vedo.show(
        *axes_all, vedo.Points(keypoints3d, r=12), 
        interactive=True, axes=True)
      vedo.clear()


if __name__ == '__main__':
  app.run(main)